mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-14 18:47:09 -07:00
Update vendored guessit to 3.1.1
Updates python-dateutil to 2.8.2 Updates rebulk to 2.0.1
This commit is contained in:
parent
ebc9718117
commit
2226a74ef8
66 changed files with 2995 additions and 1306 deletions
Binary file not shown.
|
@ -1,4 +1,5 @@
|
|||
# coding: utf-8
|
||||
# file generated by setuptools_scm
|
||||
# don't change, don't track in version control
|
||||
version = '2.7.5'
|
||||
version = '2.8.2'
|
||||
version_tuple = (2, 8, 2)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This module offers a generic easter computing method for any given year, using
|
||||
This module offers a generic Easter computing method for any given year, using
|
||||
Western, Orthodox or Julian algorithms.
|
||||
"""
|
||||
|
||||
|
@ -21,15 +21,15 @@ def easter(year, method=EASTER_WESTERN):
|
|||
quoted in "Explanatory Supplement to the Astronomical
|
||||
Almanac", P. Kenneth Seidelmann, editor.
|
||||
|
||||
This algorithm implements three different easter
|
||||
This algorithm implements three different Easter
|
||||
calculation methods:
|
||||
|
||||
1 - Original calculation in Julian calendar, valid in
|
||||
dates after 326 AD
|
||||
2 - Original method, with date converted to Gregorian
|
||||
calendar, valid in years 1583 to 4099
|
||||
3 - Revised method, in Gregorian calendar, valid in
|
||||
years 1583 to 4099 as well
|
||||
1. Original calculation in Julian calendar, valid in
|
||||
dates after 326 AD
|
||||
2. Original method, with date converted to Gregorian
|
||||
calendar, valid in years 1583 to 4099
|
||||
3. Revised method, in Gregorian calendar, valid in
|
||||
years 1583 to 4099 as well
|
||||
|
||||
These methods are represented by the constants:
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from ._parser import parse, parser, parserinfo
|
||||
from ._parser import parse, parser, parserinfo, ParserError
|
||||
from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
|
||||
from ._parser import UnknownTimezoneWarning
|
||||
|
||||
|
@ -9,6 +9,7 @@ from .isoparser import isoparser, isoparse
|
|||
|
||||
__all__ = ['parse', 'parser', 'parserinfo',
|
||||
'isoparse', 'isoparser',
|
||||
'ParserError',
|
||||
'UnknownTimezoneWarning']
|
||||
|
||||
|
||||
|
|
|
@ -20,11 +20,11 @@ value falls back to the end of the month.
|
|||
Additional resources about date/time string formats can be found below:
|
||||
|
||||
- `A summary of the international standard date and time notation
|
||||
<http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
|
||||
- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
|
||||
<https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
|
||||
- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
|
||||
- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
|
||||
- `CPAN ParseDate module
|
||||
<http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
|
||||
<https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
|
||||
- `Java SimpleDateFormat Class
|
||||
<https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
|
||||
"""
|
||||
|
@ -40,7 +40,7 @@ from calendar import monthrange
|
|||
from io import StringIO
|
||||
|
||||
import six
|
||||
from six import binary_type, integer_types, text_type
|
||||
from six import integer_types, text_type
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
|
@ -49,7 +49,7 @@ from warnings import warn
|
|||
from .. import relativedelta
|
||||
from .. import tz
|
||||
|
||||
__all__ = ["parse", "parserinfo"]
|
||||
__all__ = ["parse", "parserinfo", "ParserError"]
|
||||
|
||||
|
||||
# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth
|
||||
|
@ -60,14 +60,8 @@ class _timelex(object):
|
|||
_split_decimal = re.compile("([.,])")
|
||||
|
||||
def __init__(self, instream):
|
||||
if six.PY2:
|
||||
# In Python 2, we can't duck type properly because unicode has
|
||||
# a 'decode' function, and we'd be double-decoding
|
||||
if isinstance(instream, (binary_type, bytearray)):
|
||||
instream = instream.decode()
|
||||
else:
|
||||
if getattr(instream, 'decode', None) is not None:
|
||||
instream = instream.decode()
|
||||
if isinstance(instream, (bytes, bytearray)):
|
||||
instream = instream.decode()
|
||||
|
||||
if isinstance(instream, text_type):
|
||||
instream = StringIO(instream)
|
||||
|
@ -291,7 +285,7 @@ class parserinfo(object):
|
|||
("s", "second", "seconds")]
|
||||
AMPM = [("am", "a"),
|
||||
("pm", "p")]
|
||||
UTCZONE = ["UTC", "GMT", "Z"]
|
||||
UTCZONE = ["UTC", "GMT", "Z", "z"]
|
||||
PERTAIN = ["of"]
|
||||
TZOFFSET = {}
|
||||
# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
|
||||
|
@ -388,7 +382,8 @@ class parserinfo(object):
|
|||
if res.year is not None:
|
||||
res.year = self.convertyear(res.year, res.century_specified)
|
||||
|
||||
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
|
||||
if ((res.tzoffset == 0 and not res.tzname) or
|
||||
(res.tzname == 'Z' or res.tzname == 'z')):
|
||||
res.tzname = "UTC"
|
||||
res.tzoffset = 0
|
||||
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
|
||||
|
@ -422,7 +417,7 @@ class _ymd(list):
|
|||
elif not self.has_month:
|
||||
return 1 <= value <= 31
|
||||
elif not self.has_year:
|
||||
# Be permissive, assume leapyear
|
||||
# Be permissive, assume leap year
|
||||
month = self[self.mstridx]
|
||||
return 1 <= value <= monthrange(2000, month)[1]
|
||||
else:
|
||||
|
@ -538,7 +533,7 @@ class _ymd(list):
|
|||
year, month, day = self
|
||||
else:
|
||||
# 01-Jan-01
|
||||
# Give precendence to day-first, since
|
||||
# Give precedence to day-first, since
|
||||
# two-digit years is usually hand-written.
|
||||
day, month, year = self
|
||||
|
||||
|
@ -625,7 +620,7 @@ class parser(object):
|
|||
first element being a :class:`datetime.datetime` object, the second
|
||||
a tuple containing the fuzzy tokens.
|
||||
|
||||
:raises ValueError:
|
||||
:raises ParserError:
|
||||
Raised for invalid or unknown string format, if the provided
|
||||
:class:`tzinfo` is not in a valid format, or if an invalid date
|
||||
would be created.
|
||||
|
@ -645,12 +640,15 @@ class parser(object):
|
|||
res, skipped_tokens = self._parse(timestr, **kwargs)
|
||||
|
||||
if res is None:
|
||||
raise ValueError("Unknown string format:", timestr)
|
||||
raise ParserError("Unknown string format: %s", timestr)
|
||||
|
||||
if len(res) == 0:
|
||||
raise ValueError("String does not contain a date:", timestr)
|
||||
raise ParserError("String does not contain a date: %s", timestr)
|
||||
|
||||
ret = self._build_naive(res, default)
|
||||
try:
|
||||
ret = self._build_naive(res, default)
|
||||
except ValueError as e:
|
||||
six.raise_from(ParserError(str(e) + ": %s", timestr), e)
|
||||
|
||||
if not ignoretz:
|
||||
ret = self._build_tzaware(ret, res, tzinfos)
|
||||
|
@ -1021,7 +1019,7 @@ class parser(object):
|
|||
hms_idx = idx + 2
|
||||
|
||||
elif idx > 0 and info.hms(tokens[idx-1]) is not None:
|
||||
# There is a "h", "m", or "s" preceeding this token. Since neither
|
||||
# There is a "h", "m", or "s" preceding this token. Since neither
|
||||
# of the previous cases was hit, there is no label following this
|
||||
# token, so we use the previous label.
|
||||
# e.g. the "04" in "12h04"
|
||||
|
@ -1060,7 +1058,8 @@ class parser(object):
|
|||
tzname is None and
|
||||
tzoffset is None and
|
||||
len(token) <= 5 and
|
||||
all(x in string.ascii_uppercase for x in token))
|
||||
(all(x in string.ascii_uppercase for x in token)
|
||||
or token in self.info.UTCZONE))
|
||||
|
||||
def _ampm_valid(self, hour, ampm, fuzzy):
|
||||
"""
|
||||
|
@ -1100,7 +1099,7 @@ class parser(object):
|
|||
def _parse_min_sec(self, value):
|
||||
# TODO: Every usage of this function sets res.second to the return
|
||||
# value. Are there any cases where second will be returned as None and
|
||||
# we *dont* want to set res.second = None?
|
||||
# we *don't* want to set res.second = None?
|
||||
minute = int(value)
|
||||
second = None
|
||||
|
||||
|
@ -1109,14 +1108,6 @@ class parser(object):
|
|||
second = int(60 * sec_remainder)
|
||||
return (minute, second)
|
||||
|
||||
def _parsems(self, value):
|
||||
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
|
||||
if "." not in value:
|
||||
return int(value), 0
|
||||
else:
|
||||
i, f = value.split(".")
|
||||
return int(i), int(f.ljust(6, "0")[:6])
|
||||
|
||||
def _parse_hms(self, idx, tokens, info, hms_idx):
|
||||
# TODO: Is this going to admit a lot of false-positives for when we
|
||||
# just happen to have digits and "h", "m" or "s" characters in non-date
|
||||
|
@ -1135,21 +1126,35 @@ class parser(object):
|
|||
|
||||
return (new_idx, hms)
|
||||
|
||||
def _recombine_skipped(self, tokens, skipped_idxs):
|
||||
"""
|
||||
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
|
||||
>>> skipped_idxs = [0, 1, 2, 5]
|
||||
>>> _recombine_skipped(tokens, skipped_idxs)
|
||||
["foo bar", "baz"]
|
||||
"""
|
||||
skipped_tokens = []
|
||||
for i, idx in enumerate(sorted(skipped_idxs)):
|
||||
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
|
||||
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
|
||||
else:
|
||||
skipped_tokens.append(tokens[idx])
|
||||
# ------------------------------------------------------------------
|
||||
# Handling for individual tokens. These are kept as methods instead
|
||||
# of functions for the sake of customizability via subclassing.
|
||||
|
||||
return skipped_tokens
|
||||
def _parsems(self, value):
|
||||
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
|
||||
if "." not in value:
|
||||
return int(value), 0
|
||||
else:
|
||||
i, f = value.split(".")
|
||||
return int(i), int(f.ljust(6, "0")[:6])
|
||||
|
||||
def _to_decimal(self, val):
|
||||
try:
|
||||
decimal_value = Decimal(val)
|
||||
# See GH 662, edge case, infinite value should not be converted
|
||||
# via `_to_decimal`
|
||||
if not decimal_value.is_finite():
|
||||
raise ValueError("Converted decimal value is infinite or NaN")
|
||||
except Exception as e:
|
||||
msg = "Could not convert %s to decimal" % val
|
||||
six.raise_from(ValueError(msg), e)
|
||||
else:
|
||||
return decimal_value
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Post-Parsing construction of datetime output. These are kept as
|
||||
# methods instead of functions for the sake of customizability via
|
||||
# subclassing.
|
||||
|
||||
def _build_tzinfo(self, tzinfos, tzname, tzoffset):
|
||||
if callable(tzinfos):
|
||||
|
@ -1164,6 +1169,9 @@ class parser(object):
|
|||
tzinfo = tz.tzstr(tzdata)
|
||||
elif isinstance(tzdata, integer_types):
|
||||
tzinfo = tz.tzoffset(tzname, tzdata)
|
||||
else:
|
||||
raise TypeError("Offset must be tzinfo subclass, tz string, "
|
||||
"or int offset.")
|
||||
return tzinfo
|
||||
|
||||
def _build_tzaware(self, naive, res, tzinfos):
|
||||
|
@ -1181,10 +1189,10 @@ class parser(object):
|
|||
# This is mostly relevant for winter GMT zones parsed in the UK
|
||||
if (aware.tzname() != res.tzname and
|
||||
res.tzname in self.info.UTCZONE):
|
||||
aware = aware.replace(tzinfo=tz.tzutc())
|
||||
aware = aware.replace(tzinfo=tz.UTC)
|
||||
|
||||
elif res.tzoffset == 0:
|
||||
aware = naive.replace(tzinfo=tz.tzutc())
|
||||
aware = naive.replace(tzinfo=tz.UTC)
|
||||
|
||||
elif res.tzoffset:
|
||||
aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
|
||||
|
@ -1239,17 +1247,21 @@ class parser(object):
|
|||
|
||||
return dt
|
||||
|
||||
def _to_decimal(self, val):
|
||||
try:
|
||||
decimal_value = Decimal(val)
|
||||
# See GH 662, edge case, infinite value should not be converted via `_to_decimal`
|
||||
if not decimal_value.is_finite():
|
||||
raise ValueError("Converted decimal value is infinite or NaN")
|
||||
except Exception as e:
|
||||
msg = "Could not convert %s to decimal" % val
|
||||
six.raise_from(ValueError(msg), e)
|
||||
else:
|
||||
return decimal_value
|
||||
def _recombine_skipped(self, tokens, skipped_idxs):
|
||||
"""
|
||||
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
|
||||
>>> skipped_idxs = [0, 1, 2, 5]
|
||||
>>> _recombine_skipped(tokens, skipped_idxs)
|
||||
["foo bar", "baz"]
|
||||
"""
|
||||
skipped_tokens = []
|
||||
for i, idx in enumerate(sorted(skipped_idxs)):
|
||||
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
|
||||
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
|
||||
else:
|
||||
skipped_tokens.append(tokens[idx])
|
||||
|
||||
return skipped_tokens
|
||||
|
||||
|
||||
DEFAULTPARSER = parser()
|
||||
|
@ -1341,10 +1353,10 @@ def parse(timestr, parserinfo=None, **kwargs):
|
|||
first element being a :class:`datetime.datetime` object, the second
|
||||
a tuple containing the fuzzy tokens.
|
||||
|
||||
:raises ValueError:
|
||||
Raised for invalid or unknown string format, if the provided
|
||||
:class:`tzinfo` is not in a valid format, or if an invalid date
|
||||
would be created.
|
||||
:raises ParserError:
|
||||
Raised for invalid or unknown string formats, if the provided
|
||||
:class:`tzinfo` is not in a valid format, or if an invalid date would
|
||||
be created.
|
||||
|
||||
:raises OverflowError:
|
||||
Raised if the parsed date exceeds the largest valid C integer on
|
||||
|
@ -1573,6 +1585,29 @@ DEFAULTTZPARSER = _tzparser()
|
|||
def _parsetz(tzstr):
|
||||
return DEFAULTTZPARSER.parse(tzstr)
|
||||
|
||||
|
||||
class ParserError(ValueError):
|
||||
"""Exception subclass used for any failure to parse a datetime string.
|
||||
|
||||
This is a subclass of :py:exc:`ValueError`, and should be raised any time
|
||||
earlier versions of ``dateutil`` would have raised ``ValueError``.
|
||||
|
||||
.. versionadded:: 2.8.1
|
||||
"""
|
||||
def __str__(self):
|
||||
try:
|
||||
return self.args[0] % self.args[1:]
|
||||
except (TypeError, IndexError):
|
||||
return super(ParserError, self).__str__()
|
||||
|
||||
def __repr__(self):
|
||||
args = ", ".join("'%s'" % arg for arg in self.args)
|
||||
return "%s(%s)" % (self.__class__.__name__, args)
|
||||
|
||||
|
||||
class UnknownTimezoneWarning(RuntimeWarning):
|
||||
"""Raised when the parser finds a timezone it cannot parse into a tzinfo"""
|
||||
"""Raised when the parser finds a timezone it cannot parse into a tzinfo.
|
||||
|
||||
.. versionadded:: 2.7.0
|
||||
"""
|
||||
# vim:ts=4:sw=4:et
|
||||
|
|
|
@ -88,10 +88,12 @@ class isoparser(object):
|
|||
- ``hh``
|
||||
- ``hh:mm`` or ``hhmm``
|
||||
- ``hh:mm:ss`` or ``hhmmss``
|
||||
- ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits)
|
||||
- ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
|
||||
|
||||
Midnight is a special case for `hh`, as the standard supports both
|
||||
00:00 and 24:00 as a representation.
|
||||
00:00 and 24:00 as a representation. The decimal separator can be
|
||||
either a dot or a comma.
|
||||
|
||||
|
||||
.. caution::
|
||||
|
||||
|
@ -137,6 +139,10 @@ class isoparser(object):
|
|||
else:
|
||||
raise ValueError('String contains unknown ISO components')
|
||||
|
||||
if len(components) > 3 and components[3] == 24:
|
||||
components[3] = 0
|
||||
return datetime(*components) + timedelta(days=1)
|
||||
|
||||
return datetime(*components)
|
||||
|
||||
@_takes_ascii
|
||||
|
@ -153,7 +159,7 @@ class isoparser(object):
|
|||
components, pos = self._parse_isodate(datestr)
|
||||
if pos < len(datestr):
|
||||
raise ValueError('String contains unknown ISO ' +
|
||||
'components: {}'.format(datestr))
|
||||
'components: {!r}'.format(datestr.decode('ascii')))
|
||||
return date(*components)
|
||||
|
||||
@_takes_ascii
|
||||
|
@ -167,7 +173,10 @@ class isoparser(object):
|
|||
:return:
|
||||
Returns a :class:`datetime.time` object
|
||||
"""
|
||||
return time(*self._parse_isotime(timestr))
|
||||
components = self._parse_isotime(timestr)
|
||||
if components[0] == 24:
|
||||
components[0] = 0
|
||||
return time(*components)
|
||||
|
||||
@_takes_ascii
|
||||
def parse_tzstr(self, tzstr, zero_as_utc=True):
|
||||
|
@ -190,10 +199,9 @@ class isoparser(object):
|
|||
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
|
||||
|
||||
# Constants
|
||||
_MICROSECOND_END_REGEX = re.compile(b'[-+Z]+')
|
||||
_DATE_SEP = b'-'
|
||||
_TIME_SEP = b':'
|
||||
_MICRO_SEP = b'.'
|
||||
_FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
|
||||
|
||||
def _parse_isodate(self, dt_str):
|
||||
try:
|
||||
|
@ -325,39 +333,42 @@ class isoparser(object):
|
|||
pos = 0
|
||||
comp = -1
|
||||
|
||||
if len(timestr) < 2:
|
||||
if len_str < 2:
|
||||
raise ValueError('ISO time too short')
|
||||
|
||||
has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
|
||||
has_sep = False
|
||||
|
||||
while pos < len_str and comp < 5:
|
||||
comp += 1
|
||||
|
||||
if timestr[pos:pos + 1] in b'-+Z':
|
||||
if timestr[pos:pos + 1] in b'-+Zz':
|
||||
# Detect time zone boundary
|
||||
components[-1] = self._parse_tzstr(timestr[pos:])
|
||||
pos = len_str
|
||||
break
|
||||
|
||||
if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
|
||||
has_sep = True
|
||||
pos += 1
|
||||
elif comp == 2 and has_sep:
|
||||
if timestr[pos:pos+1] != self._TIME_SEP:
|
||||
raise ValueError('Inconsistent use of colon separator')
|
||||
pos += 1
|
||||
|
||||
if comp < 3:
|
||||
# Hour, minute, second
|
||||
components[comp] = int(timestr[pos:pos + 2])
|
||||
pos += 2
|
||||
if (has_sep and pos < len_str and
|
||||
timestr[pos:pos + 1] == self._TIME_SEP):
|
||||
pos += 1
|
||||
|
||||
if comp == 3:
|
||||
# Microsecond
|
||||
if timestr[pos:pos + 1] != self._MICRO_SEP:
|
||||
# Fraction of a second
|
||||
frac = self._FRACTION_REGEX.match(timestr[pos:])
|
||||
if not frac:
|
||||
continue
|
||||
|
||||
pos += 1
|
||||
us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6],
|
||||
1)[0]
|
||||
|
||||
us_str = frac.group(1)[:6] # Truncate to microseconds
|
||||
components[comp] = int(us_str) * 10**(6 - len(us_str))
|
||||
pos += len(us_str)
|
||||
pos += len(frac.group())
|
||||
|
||||
if pos < len_str:
|
||||
raise ValueError('Unused components in ISO string')
|
||||
|
@ -366,13 +377,12 @@ class isoparser(object):
|
|||
# Standard supports 00:00 and 24:00 as representations of midnight
|
||||
if any(component != 0 for component in components[1:4]):
|
||||
raise ValueError('Hour may only be 24 at 24:00:00.000')
|
||||
components[0] = 0
|
||||
|
||||
return components
|
||||
|
||||
def _parse_tzstr(self, tzstr, zero_as_utc=True):
|
||||
if tzstr == b'Z':
|
||||
return tz.tzutc()
|
||||
if tzstr == b'Z' or tzstr == b'z':
|
||||
return tz.UTC
|
||||
|
||||
if len(tzstr) not in {3, 5, 6}:
|
||||
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
|
||||
|
@ -391,7 +401,7 @@ class isoparser(object):
|
|||
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
|
||||
|
||||
if zero_as_utc and hours == 0 and minutes == 0:
|
||||
return tz.tzutc()
|
||||
return tz.UTC
|
||||
else:
|
||||
if minutes > 59:
|
||||
raise ValueError('Invalid minutes in time zone offset')
|
||||
|
|
|
@ -17,8 +17,12 @@ __all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
|
|||
|
||||
class relativedelta(object):
|
||||
"""
|
||||
The relativedelta type is based on the specification of the excellent
|
||||
work done by M.-A. Lemburg in his
|
||||
The relativedelta type is designed to be applied to an existing datetime and
|
||||
can replace specific components of that datetime, or represents an interval
|
||||
of time.
|
||||
|
||||
It is based on the specification of the excellent work done by M.-A. Lemburg
|
||||
in his
|
||||
`mx.DateTime <https://www.egenix.com/products/python/mxBase/mxDateTime/>`_ extension.
|
||||
However, notice that this type does *NOT* implement the same algorithm as
|
||||
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
|
||||
|
@ -41,17 +45,19 @@ class relativedelta(object):
|
|||
years, months, weeks, days, hours, minutes, seconds, microseconds:
|
||||
Relative information, may be negative (argument is plural); adding
|
||||
or subtracting a relativedelta with relative information performs
|
||||
the corresponding aritmetic operation on the original datetime value
|
||||
the corresponding arithmetic operation on the original datetime value
|
||||
with the information in the relativedelta.
|
||||
|
||||
weekday:
|
||||
One of the weekday instances (MO, TU, etc). These
|
||||
instances may receive a parameter N, specifying the Nth
|
||||
weekday, which could be positive or negative (like MO(+1)
|
||||
or MO(-2). Not specifying it is the same as specifying
|
||||
+1. You can also use an integer, where 0=MO. Notice that
|
||||
if the calculated date is already Monday, for example,
|
||||
using MO(1) or MO(-1) won't change the day.
|
||||
One of the weekday instances (MO, TU, etc) available in the
|
||||
relativedelta module. These instances may receive a parameter N,
|
||||
specifying the Nth weekday, which could be positive or negative
|
||||
(like MO(+1) or MO(-2)). Not specifying it is the same as specifying
|
||||
+1. You can also use an integer, where 0=MO. This argument is always
|
||||
relative e.g. if the calculated date is already Monday, using MO(1)
|
||||
or MO(-1) won't change the day. To effectively make it absolute, use
|
||||
it in combination with the day argument (e.g. day=1, MO(1) for first
|
||||
Monday of the month).
|
||||
|
||||
leapdays:
|
||||
Will add given days to the date found, if year is a leap
|
||||
|
@ -82,9 +88,12 @@ class relativedelta(object):
|
|||
|
||||
For example
|
||||
|
||||
>>> from datetime import datetime
|
||||
>>> from dateutil.relativedelta import relativedelta, MO
|
||||
>>> dt = datetime(2018, 4, 9, 13, 37, 0)
|
||||
>>> delta = relativedelta(hours=25, day=1, weekday=MO(1))
|
||||
datetime(2018, 4, 2, 14, 37, 0)
|
||||
>>> dt + delta
|
||||
datetime.datetime(2018, 4, 2, 14, 37)
|
||||
|
||||
First, the day is set to 1 (the first of the month), then 25 hours
|
||||
are added, to get to the 2nd day and 14th hour, finally the
|
||||
|
@ -276,7 +285,7 @@ class relativedelta(object):
|
|||
values for the relative attributes.
|
||||
|
||||
>>> relativedelta(days=1.5, hours=2).normalized()
|
||||
relativedelta(days=1, hours=14)
|
||||
relativedelta(days=+1, hours=+14)
|
||||
|
||||
:return:
|
||||
Returns a :class:`dateutil.relativedelta.relativedelta` object.
|
||||
|
|
|
@ -5,27 +5,27 @@ the recurrence rules documented in the
|
|||
`iCalendar RFC <https://tools.ietf.org/html/rfc5545>`_,
|
||||
including support for caching of results.
|
||||
"""
|
||||
import itertools
|
||||
import datetime
|
||||
import calendar
|
||||
import datetime
|
||||
import heapq
|
||||
import itertools
|
||||
import re
|
||||
import sys
|
||||
from functools import wraps
|
||||
# For warning about deprecation of until and count
|
||||
from warnings import warn
|
||||
|
||||
from six import advance_iterator, integer_types
|
||||
|
||||
from six.moves import _thread, range
|
||||
|
||||
from ._common import weekday as weekdaybase
|
||||
|
||||
try:
|
||||
from math import gcd
|
||||
except ImportError:
|
||||
from fractions import gcd
|
||||
|
||||
from six import advance_iterator, integer_types
|
||||
from six.moves import _thread, range
|
||||
import heapq
|
||||
|
||||
from ._common import weekday as weekdaybase
|
||||
from .tz import tzutc, tzlocal
|
||||
|
||||
# For warning about deprecation of until and count
|
||||
from warnings import warn
|
||||
|
||||
__all__ = ["rrule", "rruleset", "rrulestr",
|
||||
"YEARLY", "MONTHLY", "WEEKLY", "DAILY",
|
||||
"HOURLY", "MINUTELY", "SECONDLY",
|
||||
|
@ -82,6 +82,7 @@ def _invalidates_cache(f):
|
|||
Decorator for rruleset methods which may invalidate the
|
||||
cached length.
|
||||
"""
|
||||
@wraps(f)
|
||||
def inner_func(self, *args, **kwargs):
|
||||
rv = f(self, *args, **kwargs)
|
||||
self._invalidate_cache()
|
||||
|
@ -178,7 +179,7 @@ class rrulebase(object):
|
|||
return False
|
||||
return False
|
||||
|
||||
# __len__() introduces a large performance penality.
|
||||
# __len__() introduces a large performance penalty.
|
||||
def count(self):
|
||||
""" Returns the number of recurrences in this set. It will have go
|
||||
trough the whole recurrence, if this hasn't been done before. """
|
||||
|
@ -353,20 +354,26 @@ class rrule(rrulebase):
|
|||
from calendar.firstweekday(), and may be modified by
|
||||
calendar.setfirstweekday().
|
||||
:param count:
|
||||
How many occurrences will be generated.
|
||||
If given, this determines how many occurrences will be generated.
|
||||
|
||||
.. note::
|
||||
As of version 2.5.0, the use of the ``until`` keyword together
|
||||
with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10.
|
||||
As of version 2.5.0, the use of the keyword ``until`` in conjunction
|
||||
with ``count`` is deprecated, to make sure ``dateutil`` is fully
|
||||
compliant with `RFC-5545 Sec. 3.3.10 <https://tools.ietf.org/
|
||||
html/rfc5545#section-3.3.10>`_. Therefore, ``until`` and ``count``
|
||||
**must not** occur in the same call to ``rrule``.
|
||||
:param until:
|
||||
If given, this must be a datetime instance, that will specify the
|
||||
If given, this must be a datetime instance specifying the upper-bound
|
||||
limit of the recurrence. The last recurrence in the rule is the greatest
|
||||
datetime that is less than or equal to the value specified in the
|
||||
``until`` parameter.
|
||||
|
||||
.. note::
|
||||
As of version 2.5.0, the use of the ``until`` keyword together
|
||||
with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10.
|
||||
As of version 2.5.0, the use of the keyword ``until`` in conjunction
|
||||
with ``count`` is deprecated, to make sure ``dateutil`` is fully
|
||||
compliant with `RFC-5545 Sec. 3.3.10 <https://tools.ietf.org/
|
||||
html/rfc5545#section-3.3.10>`_. Therefore, ``until`` and ``count``
|
||||
**must not** occur in the same call to ``rrule``.
|
||||
:param bysetpos:
|
||||
If given, it must be either an integer, or a sequence of integers,
|
||||
positive or negative. Each given integer will specify an occurrence
|
||||
|
@ -429,7 +436,7 @@ class rrule(rrulebase):
|
|||
if not dtstart:
|
||||
if until and until.tzinfo:
|
||||
dtstart = datetime.datetime.now(tz=until.tzinfo).replace(microsecond=0)
|
||||
else:
|
||||
else:
|
||||
dtstart = datetime.datetime.now().replace(microsecond=0)
|
||||
elif not isinstance(dtstart, datetime.datetime):
|
||||
dtstart = datetime.datetime.fromordinal(dtstart.toordinal())
|
||||
|
@ -1406,7 +1413,52 @@ class rruleset(rrulebase):
|
|||
self._len = total
|
||||
|
||||
|
||||
|
||||
|
||||
class _rrulestr(object):
|
||||
""" Parses a string representation of a recurrence rule or set of
|
||||
recurrence rules.
|
||||
|
||||
:param s:
|
||||
Required, a string defining one or more recurrence rules.
|
||||
|
||||
:param dtstart:
|
||||
If given, used as the default recurrence start if not specified in the
|
||||
rule string.
|
||||
|
||||
:param cache:
|
||||
If set ``True`` caching of results will be enabled, improving
|
||||
performance of multiple queries considerably.
|
||||
|
||||
:param unfold:
|
||||
If set ``True`` indicates that a rule string is split over more
|
||||
than one line and should be joined before processing.
|
||||
|
||||
:param forceset:
|
||||
If set ``True`` forces a :class:`dateutil.rrule.rruleset` to
|
||||
be returned.
|
||||
|
||||
:param compatible:
|
||||
If set ``True`` forces ``unfold`` and ``forceset`` to be ``True``.
|
||||
|
||||
:param ignoretz:
|
||||
If set ``True``, time zones in parsed strings are ignored and a naive
|
||||
:class:`datetime.datetime` object is returned.
|
||||
|
||||
:param tzids:
|
||||
If given, a callable or mapping used to retrieve a
|
||||
:class:`datetime.tzinfo` from a string representation.
|
||||
Defaults to :func:`dateutil.tz.gettz`.
|
||||
|
||||
:param tzinfos:
|
||||
Additional time zone names / aliases which may be present in a string
|
||||
representation. See :func:`dateutil.parser.parse` for more
|
||||
information.
|
||||
|
||||
:return:
|
||||
Returns a :class:`dateutil.rrule.rruleset` or
|
||||
:class:`dateutil.rrule.rrule`
|
||||
"""
|
||||
|
||||
_freq_map = {"YEARLY": YEARLY,
|
||||
"MONTHLY": MONTHLY,
|
||||
|
@ -1508,6 +1560,58 @@ class _rrulestr(object):
|
|||
raise ValueError("invalid '%s': %s" % (name, value))
|
||||
return rrule(dtstart=dtstart, cache=cache, **rrkwargs)
|
||||
|
||||
def _parse_date_value(self, date_value, parms, rule_tzids,
|
||||
ignoretz, tzids, tzinfos):
|
||||
global parser
|
||||
if not parser:
|
||||
from dateutil import parser
|
||||
|
||||
datevals = []
|
||||
value_found = False
|
||||
TZID = None
|
||||
|
||||
for parm in parms:
|
||||
if parm.startswith("TZID="):
|
||||
try:
|
||||
tzkey = rule_tzids[parm.split('TZID=')[-1]]
|
||||
except KeyError:
|
||||
continue
|
||||
if tzids is None:
|
||||
from . import tz
|
||||
tzlookup = tz.gettz
|
||||
elif callable(tzids):
|
||||
tzlookup = tzids
|
||||
else:
|
||||
tzlookup = getattr(tzids, 'get', None)
|
||||
if tzlookup is None:
|
||||
msg = ('tzids must be a callable, mapping, or None, '
|
||||
'not %s' % tzids)
|
||||
raise ValueError(msg)
|
||||
|
||||
TZID = tzlookup(tzkey)
|
||||
continue
|
||||
|
||||
# RFC 5445 3.8.2.4: The VALUE parameter is optional, but may be found
|
||||
# only once.
|
||||
if parm not in {"VALUE=DATE-TIME", "VALUE=DATE"}:
|
||||
raise ValueError("unsupported parm: " + parm)
|
||||
else:
|
||||
if value_found:
|
||||
msg = ("Duplicate value parameter found in: " + parm)
|
||||
raise ValueError(msg)
|
||||
value_found = True
|
||||
|
||||
for datestr in date_value.split(','):
|
||||
date = parser.parse(datestr, ignoretz=ignoretz, tzinfos=tzinfos)
|
||||
if TZID is not None:
|
||||
if date.tzinfo is None:
|
||||
date = date.replace(tzinfo=TZID)
|
||||
else:
|
||||
raise ValueError('DTSTART/EXDATE specifies multiple timezone')
|
||||
datevals.append(date)
|
||||
|
||||
return datevals
|
||||
|
||||
def _parse_rfc(self, s,
|
||||
dtstart=None,
|
||||
cache=False,
|
||||
|
@ -1580,54 +1684,18 @@ class _rrulestr(object):
|
|||
raise ValueError("unsupported EXRULE parm: "+parm)
|
||||
exrulevals.append(value)
|
||||
elif name == "EXDATE":
|
||||
for parm in parms:
|
||||
if parm != "VALUE=DATE-TIME":
|
||||
raise ValueError("unsupported EXDATE parm: "+parm)
|
||||
exdatevals.append(value)
|
||||
exdatevals.extend(
|
||||
self._parse_date_value(value, parms,
|
||||
TZID_NAMES, ignoretz,
|
||||
tzids, tzinfos)
|
||||
)
|
||||
elif name == "DTSTART":
|
||||
# RFC 5445 3.8.2.4: The VALUE parameter is optional, but
|
||||
# may be found only once.
|
||||
value_found = False
|
||||
TZID = None
|
||||
valid_values = {"VALUE=DATE-TIME", "VALUE=DATE"}
|
||||
for parm in parms:
|
||||
if parm.startswith("TZID="):
|
||||
try:
|
||||
tzkey = TZID_NAMES[parm.split('TZID=')[-1]]
|
||||
except KeyError:
|
||||
continue
|
||||
if tzids is None:
|
||||
from . import tz
|
||||
tzlookup = tz.gettz
|
||||
elif callable(tzids):
|
||||
tzlookup = tzids
|
||||
else:
|
||||
tzlookup = getattr(tzids, 'get', None)
|
||||
if tzlookup is None:
|
||||
msg = ('tzids must be a callable, ' +
|
||||
'mapping, or None, ' +
|
||||
'not %s' % tzids)
|
||||
raise ValueError(msg)
|
||||
|
||||
TZID = tzlookup(tzkey)
|
||||
continue
|
||||
if parm not in valid_values:
|
||||
raise ValueError("unsupported DTSTART parm: "+parm)
|
||||
else:
|
||||
if value_found:
|
||||
msg = ("Duplicate value parameter found in " +
|
||||
"DTSTART: " + parm)
|
||||
raise ValueError(msg)
|
||||
value_found = True
|
||||
if not parser:
|
||||
from dateutil import parser
|
||||
dtstart = parser.parse(value, ignoretz=ignoretz,
|
||||
tzinfos=tzinfos)
|
||||
if TZID is not None:
|
||||
if dtstart.tzinfo is None:
|
||||
dtstart = dtstart.replace(tzinfo=TZID)
|
||||
else:
|
||||
raise ValueError('DTSTART specifies multiple timezones')
|
||||
dtvals = self._parse_date_value(value, parms, TZID_NAMES,
|
||||
ignoretz, tzids, tzinfos)
|
||||
if len(dtvals) != 1:
|
||||
raise ValueError("Multiple DTSTART values specified:" +
|
||||
value)
|
||||
dtstart = dtvals[0]
|
||||
else:
|
||||
raise ValueError("unsupported property: "+name)
|
||||
if (forceset or len(rrulevals) > 1 or rdatevals
|
||||
|
@ -1649,10 +1717,7 @@ class _rrulestr(object):
|
|||
ignoretz=ignoretz,
|
||||
tzinfos=tzinfos))
|
||||
for value in exdatevals:
|
||||
for datestr in value.split(','):
|
||||
rset.exdate(parser.parse(datestr,
|
||||
ignoretz=ignoretz,
|
||||
tzinfos=tzinfos))
|
||||
rset.exdate(value)
|
||||
if compatible and dtstart:
|
||||
rset.rdate(dtstart)
|
||||
return rset
|
||||
|
|
|
@ -2,11 +2,6 @@
|
|||
from .tz import *
|
||||
from .tz import __doc__
|
||||
|
||||
#: Convenience constant providing a :class:`tzutc()` instance
|
||||
#:
|
||||
#: .. versionadded:: 2.7.0
|
||||
UTC = tzutc()
|
||||
|
||||
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
|
||||
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz",
|
||||
"enfold", "datetime_ambiguous", "datetime_exists",
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from six import PY3
|
||||
from six import PY2
|
||||
|
||||
from functools import wraps
|
||||
|
||||
|
@ -16,14 +16,18 @@ def tzname_in_python2(namefunc):
|
|||
tzname() API changed in Python 3. It used to return bytes, but was changed
|
||||
to unicode strings
|
||||
"""
|
||||
def adjust_encoding(*args, **kwargs):
|
||||
name = namefunc(*args, **kwargs)
|
||||
if name is not None and not PY3:
|
||||
name = name.encode()
|
||||
if PY2:
|
||||
@wraps(namefunc)
|
||||
def adjust_encoding(*args, **kwargs):
|
||||
name = namefunc(*args, **kwargs)
|
||||
if name is not None:
|
||||
name = name.encode()
|
||||
|
||||
return name
|
||||
return name
|
||||
|
||||
return adjust_encoding
|
||||
return adjust_encoding
|
||||
else:
|
||||
return namefunc
|
||||
|
||||
|
||||
# The following is adapted from Alexander Belopolsky's tz library
|
||||
|
@ -208,7 +212,7 @@ class _tzinfo(tzinfo):
|
|||
Since this is the one time that we *know* we have an unambiguous
|
||||
datetime object, we take this opportunity to determine whether the
|
||||
datetime is ambiguous and in a "fold" state (e.g. if it's the first
|
||||
occurence, chronologically, of the ambiguous datetime).
|
||||
occurrence, chronologically, of the ambiguous datetime).
|
||||
|
||||
:param dt:
|
||||
A timezone-aware :class:`datetime.datetime` object.
|
||||
|
@ -246,7 +250,7 @@ class _tzinfo(tzinfo):
|
|||
Since this is the one time that we *know* we have an unambiguous
|
||||
datetime object, we take this opportunity to determine whether the
|
||||
datetime is ambiguous and in a "fold" state (e.g. if it's the first
|
||||
occurance, chronologically, of the ambiguous datetime).
|
||||
occurrence, chronologically, of the ambiguous datetime).
|
||||
|
||||
:param dt:
|
||||
A timezone-aware :class:`datetime.datetime` object.
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
from datetime import timedelta
|
||||
import weakref
|
||||
from collections import OrderedDict
|
||||
|
||||
from six.moves import _thread
|
||||
|
||||
|
||||
class _TzSingleton(type):
|
||||
|
@ -11,6 +15,7 @@ class _TzSingleton(type):
|
|||
cls.__instance = super(_TzSingleton, cls).__call__()
|
||||
return cls.__instance
|
||||
|
||||
|
||||
class _TzFactory(type):
|
||||
def instance(cls, *args, **kwargs):
|
||||
"""Alternate constructor that returns a fresh instance"""
|
||||
|
@ -19,7 +24,11 @@ class _TzFactory(type):
|
|||
|
||||
class _TzOffsetFactory(_TzFactory):
|
||||
def __init__(cls, *args, **kwargs):
|
||||
cls.__instances = {}
|
||||
cls.__instances = weakref.WeakValueDictionary()
|
||||
cls.__strong_cache = OrderedDict()
|
||||
cls.__strong_cache_size = 8
|
||||
|
||||
cls._cache_lock = _thread.allocate_lock()
|
||||
|
||||
def __call__(cls, name, offset):
|
||||
if isinstance(offset, timedelta):
|
||||
|
@ -31,12 +40,25 @@ class _TzOffsetFactory(_TzFactory):
|
|||
if instance is None:
|
||||
instance = cls.__instances.setdefault(key,
|
||||
cls.instance(name, offset))
|
||||
|
||||
# This lock may not be necessary in Python 3. See GH issue #901
|
||||
with cls._cache_lock:
|
||||
cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
|
||||
|
||||
# Remove an item if the strong cache is overpopulated
|
||||
if len(cls.__strong_cache) > cls.__strong_cache_size:
|
||||
cls.__strong_cache.popitem(last=False)
|
||||
|
||||
return instance
|
||||
|
||||
|
||||
class _TzStrFactory(_TzFactory):
|
||||
def __init__(cls, *args, **kwargs):
|
||||
cls.__instances = {}
|
||||
cls.__instances = weakref.WeakValueDictionary()
|
||||
cls.__strong_cache = OrderedDict()
|
||||
cls.__strong_cache_size = 8
|
||||
|
||||
cls.__cache_lock = _thread.allocate_lock()
|
||||
|
||||
def __call__(cls, s, posix_offset=False):
|
||||
key = (s, posix_offset)
|
||||
|
@ -45,5 +67,14 @@ class _TzStrFactory(_TzFactory):
|
|||
if instance is None:
|
||||
instance = cls.__instances.setdefault(key,
|
||||
cls.instance(s, posix_offset))
|
||||
|
||||
# This lock may not be necessary in Python 3. See GH issue #901
|
||||
with cls.__cache_lock:
|
||||
cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
|
||||
|
||||
# Remove an item if the strong cache is overpopulated
|
||||
if len(cls.__strong_cache) > cls.__strong_cache_size:
|
||||
cls.__strong_cache.popitem(last=False)
|
||||
|
||||
return instance
|
||||
|
||||
|
|
|
@ -13,6 +13,8 @@ import time
|
|||
import sys
|
||||
import os
|
||||
import bisect
|
||||
import weakref
|
||||
from collections import OrderedDict
|
||||
|
||||
import six
|
||||
from six import string_types
|
||||
|
@ -28,6 +30,9 @@ try:
|
|||
except ImportError:
|
||||
tzwin = tzwinlocal = None
|
||||
|
||||
# For warning about rounding tzinfo
|
||||
from warnings import warn
|
||||
|
||||
ZERO = datetime.timedelta(0)
|
||||
EPOCH = datetime.datetime.utcfromtimestamp(0)
|
||||
EPOCHORDINAL = EPOCH.toordinal()
|
||||
|
@ -118,6 +123,12 @@ class tzutc(datetime.tzinfo):
|
|||
__reduce__ = object.__reduce__
|
||||
|
||||
|
||||
#: Convenience constant providing a :class:`tzutc()` instance
|
||||
#:
|
||||
#: .. versionadded:: 2.7.0
|
||||
UTC = tzutc()
|
||||
|
||||
|
||||
@six.add_metaclass(_TzOffsetFactory)
|
||||
class tzoffset(datetime.tzinfo):
|
||||
"""
|
||||
|
@ -137,7 +148,8 @@ class tzoffset(datetime.tzinfo):
|
|||
offset = offset.total_seconds()
|
||||
except (TypeError, AttributeError):
|
||||
pass
|
||||
self._offset = datetime.timedelta(seconds=offset)
|
||||
|
||||
self._offset = datetime.timedelta(seconds=_get_supported_offset(offset))
|
||||
|
||||
def utcoffset(self, dt):
|
||||
return self._offset
|
||||
|
@ -373,7 +385,7 @@ class _tzfile(object):
|
|||
|
||||
class tzfile(_tzinfo):
|
||||
"""
|
||||
This is a ``tzinfo`` subclass thant allows one to use the ``tzfile(5)``
|
||||
This is a ``tzinfo`` subclass that allows one to use the ``tzfile(5)``
|
||||
format timezone files to extract current and historical zone information.
|
||||
|
||||
:param fileobj:
|
||||
|
@ -460,7 +472,7 @@ class tzfile(_tzinfo):
|
|||
|
||||
if fileobj is not None:
|
||||
if not file_opened_here:
|
||||
fileobj = _ContextWrapper(fileobj)
|
||||
fileobj = _nullcontext(fileobj)
|
||||
|
||||
with fileobj as file_stream:
|
||||
tzobj = self._read_tzfile(file_stream)
|
||||
|
@ -600,10 +612,7 @@ class tzfile(_tzinfo):
|
|||
out.ttinfo_list = []
|
||||
for i in range(typecnt):
|
||||
gmtoff, isdst, abbrind = ttinfo[i]
|
||||
# Round to full-minutes if that's not the case. Python's
|
||||
# datetime doesn't accept sub-minute timezones. Check
|
||||
# http://python.org/sf/1447945 for some information.
|
||||
gmtoff = 60 * ((gmtoff + 30) // 60)
|
||||
gmtoff = _get_supported_offset(gmtoff)
|
||||
tti = _ttinfo()
|
||||
tti.offset = gmtoff
|
||||
tti.dstoffset = datetime.timedelta(0)
|
||||
|
@ -655,37 +664,44 @@ class tzfile(_tzinfo):
|
|||
# isgmt are off, so it should be in wall time. OTOH, it's
|
||||
# always in gmt time. Let me know if you have comments
|
||||
# about this.
|
||||
laststdoffset = None
|
||||
lastdst = None
|
||||
lastoffset = None
|
||||
lastdstoffset = None
|
||||
lastbaseoffset = None
|
||||
out.trans_list = []
|
||||
|
||||
for i, tti in enumerate(out.trans_idx):
|
||||
if not tti.isdst:
|
||||
offset = tti.offset
|
||||
laststdoffset = offset
|
||||
else:
|
||||
if laststdoffset is not None:
|
||||
# Store the DST offset as well and update it in the list
|
||||
tti.dstoffset = tti.offset - laststdoffset
|
||||
out.trans_idx[i] = tti
|
||||
offset = tti.offset
|
||||
dstoffset = 0
|
||||
|
||||
offset = laststdoffset or 0
|
||||
if lastdst is not None:
|
||||
if tti.isdst:
|
||||
if not lastdst:
|
||||
dstoffset = offset - lastoffset
|
||||
|
||||
out.trans_list.append(out.trans_list_utc[i] + offset)
|
||||
if not dstoffset and lastdstoffset:
|
||||
dstoffset = lastdstoffset
|
||||
|
||||
# In case we missed any DST offsets on the way in for some reason, make
|
||||
# a second pass over the list, looking for the /next/ DST offset.
|
||||
laststdoffset = None
|
||||
for i in reversed(range(len(out.trans_idx))):
|
||||
tti = out.trans_idx[i]
|
||||
if tti.isdst:
|
||||
if not (tti.dstoffset or laststdoffset is None):
|
||||
tti.dstoffset = tti.offset - laststdoffset
|
||||
else:
|
||||
laststdoffset = tti.offset
|
||||
tti.dstoffset = datetime.timedelta(seconds=dstoffset)
|
||||
lastdstoffset = dstoffset
|
||||
|
||||
if not isinstance(tti.dstoffset, datetime.timedelta):
|
||||
tti.dstoffset = datetime.timedelta(seconds=tti.dstoffset)
|
||||
# If a time zone changes its base offset during a DST transition,
|
||||
# then you need to adjust by the previous base offset to get the
|
||||
# transition time in local time. Otherwise you use the current
|
||||
# base offset. Ideally, I would have some mathematical proof of
|
||||
# why this is true, but I haven't really thought about it enough.
|
||||
baseoffset = offset - dstoffset
|
||||
adjustment = baseoffset
|
||||
if (lastbaseoffset is not None and baseoffset != lastbaseoffset
|
||||
and tti.isdst != lastdst):
|
||||
# The base DST has changed
|
||||
adjustment = lastbaseoffset
|
||||
|
||||
out.trans_idx[i] = tti
|
||||
lastdst = tti.isdst
|
||||
lastoffset = offset
|
||||
lastbaseoffset = baseoffset
|
||||
|
||||
out.trans_list.append(out.trans_list_utc[i] + adjustment)
|
||||
|
||||
out.trans_idx = tuple(out.trans_idx)
|
||||
out.trans_list = tuple(out.trans_list)
|
||||
|
@ -1255,7 +1271,7 @@ class tzical(object):
|
|||
fileobj = open(fileobj, 'r')
|
||||
else:
|
||||
self._s = getattr(fileobj, 'name', repr(fileobj))
|
||||
fileobj = _ContextWrapper(fileobj)
|
||||
fileobj = _nullcontext(fileobj)
|
||||
|
||||
self._vtz = {}
|
||||
|
||||
|
@ -1528,7 +1544,9 @@ def __get_gettz():
|
|||
"""
|
||||
def __init__(self):
|
||||
|
||||
self.__instances = {}
|
||||
self.__instances = weakref.WeakValueDictionary()
|
||||
self.__strong_cache_size = 8
|
||||
self.__strong_cache = OrderedDict()
|
||||
self._cache_lock = _thread.allocate_lock()
|
||||
|
||||
def __call__(self, name=None):
|
||||
|
@ -1537,17 +1555,37 @@ def __get_gettz():
|
|||
|
||||
if rv is None:
|
||||
rv = self.nocache(name=name)
|
||||
if not (name is None or isinstance(rv, tzlocal_classes)):
|
||||
if not (name is None
|
||||
or isinstance(rv, tzlocal_classes)
|
||||
or rv is None):
|
||||
# tzlocal is slightly more complicated than the other
|
||||
# time zone providers because it depends on environment
|
||||
# at construction time, so don't cache that.
|
||||
#
|
||||
# We also cannot store weak references to None, so we
|
||||
# will also not store that.
|
||||
self.__instances[name] = rv
|
||||
else:
|
||||
# No need for strong caching, return immediately
|
||||
return rv
|
||||
|
||||
self.__strong_cache[name] = self.__strong_cache.pop(name, rv)
|
||||
|
||||
if len(self.__strong_cache) > self.__strong_cache_size:
|
||||
self.__strong_cache.popitem(last=False)
|
||||
|
||||
return rv
|
||||
|
||||
def set_cache_size(self, size):
|
||||
with self._cache_lock:
|
||||
self.__strong_cache_size = size
|
||||
while len(self.__strong_cache) > size:
|
||||
self.__strong_cache.popitem(last=False)
|
||||
|
||||
def cache_clear(self):
|
||||
with self._cache_lock:
|
||||
self.__instances = {}
|
||||
self.__instances = weakref.WeakValueDictionary()
|
||||
self.__strong_cache.clear()
|
||||
|
||||
@staticmethod
|
||||
def nocache(name=None):
|
||||
|
@ -1558,7 +1596,7 @@ def __get_gettz():
|
|||
name = os.environ["TZ"]
|
||||
except KeyError:
|
||||
pass
|
||||
if name is None or name == ":":
|
||||
if name is None or name in ("", ":"):
|
||||
for filepath in TZFILES:
|
||||
if not os.path.isabs(filepath):
|
||||
filename = filepath
|
||||
|
@ -1577,8 +1615,15 @@ def __get_gettz():
|
|||
else:
|
||||
tz = tzlocal()
|
||||
else:
|
||||
if name.startswith(":"):
|
||||
name = name[1:]
|
||||
try:
|
||||
if name.startswith(":"):
|
||||
name = name[1:]
|
||||
except TypeError as e:
|
||||
if isinstance(name, bytes):
|
||||
new_msg = "gettz argument should be str, not bytes"
|
||||
six.raise_from(TypeError(new_msg), e)
|
||||
else:
|
||||
raise
|
||||
if os.path.isabs(name):
|
||||
if os.path.isfile(name):
|
||||
tz = tzfile(name)
|
||||
|
@ -1601,7 +1646,8 @@ def __get_gettz():
|
|||
if tzwin is not None:
|
||||
try:
|
||||
tz = tzwin(name)
|
||||
except WindowsError:
|
||||
except (WindowsError, UnicodeEncodeError):
|
||||
# UnicodeEncodeError is for Python 2.7 compat
|
||||
tz = None
|
||||
|
||||
if not tz:
|
||||
|
@ -1622,7 +1668,7 @@ def __get_gettz():
|
|||
break
|
||||
else:
|
||||
if name in ("GMT", "UTC"):
|
||||
tz = tzutc()
|
||||
tz = UTC
|
||||
elif name in time.tzname:
|
||||
tz = tzlocal()
|
||||
return tz
|
||||
|
@ -1662,7 +1708,7 @@ def datetime_exists(dt, tz=None):
|
|||
|
||||
# This is essentially a test of whether or not the datetime can survive
|
||||
# a round trip to UTC.
|
||||
dt_rt = dt.replace(tzinfo=tz).astimezone(tzutc()).astimezone(tz)
|
||||
dt_rt = dt.replace(tzinfo=tz).astimezone(UTC).astimezone(tz)
|
||||
dt_rt = dt_rt.replace(tzinfo=None)
|
||||
|
||||
return dt == dt_rt
|
||||
|
@ -1768,18 +1814,36 @@ def _datetime_to_timestamp(dt):
|
|||
return (dt.replace(tzinfo=None) - EPOCH).total_seconds()
|
||||
|
||||
|
||||
class _ContextWrapper(object):
|
||||
"""
|
||||
Class for wrapping contexts so that they are passed through in a
|
||||
with statement.
|
||||
"""
|
||||
def __init__(self, context):
|
||||
self.context = context
|
||||
if sys.version_info >= (3, 6):
|
||||
def _get_supported_offset(second_offset):
|
||||
return second_offset
|
||||
else:
|
||||
def _get_supported_offset(second_offset):
|
||||
# For python pre-3.6, round to full-minutes if that's not the case.
|
||||
# Python's datetime doesn't accept sub-minute timezones. Check
|
||||
# http://python.org/sf/1447945 or https://bugs.python.org/issue5288
|
||||
# for some information.
|
||||
old_offset = second_offset
|
||||
calculated_offset = 60 * ((second_offset + 30) // 60)
|
||||
return calculated_offset
|
||||
|
||||
def __enter__(self):
|
||||
return self.context
|
||||
|
||||
def __exit__(*args, **kwargs):
|
||||
pass
|
||||
try:
|
||||
# Python 3.7 feature
|
||||
from contextlib import nullcontext as _nullcontext
|
||||
except ImportError:
|
||||
class _nullcontext(object):
|
||||
"""
|
||||
Class for wrapping contexts so that they are passed through in a
|
||||
with statement.
|
||||
"""
|
||||
def __init__(self, context):
|
||||
self.context = context
|
||||
|
||||
def __enter__(self):
|
||||
return self.context
|
||||
|
||||
def __exit__(*args, **kwargs):
|
||||
pass
|
||||
|
||||
# vim:ts=4:sw=4:et
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This module provides an interface to the native time zone data on Windows,
|
||||
including :py:class:`datetime.tzinfo` implementations.
|
||||
|
||||
Attempting to import this module on a non-Windows platform will raise an
|
||||
:py:obj:`ImportError`.
|
||||
"""
|
||||
# This code was originally contributed by Jeffrey Harris.
|
||||
import datetime
|
||||
import struct
|
||||
|
@ -39,7 +47,7 @@ TZKEYNAME = _settzkeyname()
|
|||
|
||||
class tzres(object):
|
||||
"""
|
||||
Class for accessing `tzres.dll`, which contains timezone name related
|
||||
Class for accessing ``tzres.dll``, which contains timezone name related
|
||||
resources.
|
||||
|
||||
.. versionadded:: 2.5.0
|
||||
|
@ -72,9 +80,10 @@ class tzres(object):
|
|||
:param offset:
|
||||
A positive integer value referring to a string from the tzres dll.
|
||||
|
||||
..note:
|
||||
.. note::
|
||||
|
||||
Offsets found in the registry are generally of the form
|
||||
`@tzres.dll,-114`. The offset in this case if 114, not -114.
|
||||
``@tzres.dll,-114``. The offset in this case is 114, not -114.
|
||||
|
||||
"""
|
||||
resource = self.p_wchar()
|
||||
|
@ -146,6 +155,9 @@ class tzwinbase(tzrangebase):
|
|||
return result
|
||||
|
||||
def display(self):
|
||||
"""
|
||||
Return the display name of the time zone.
|
||||
"""
|
||||
return self._display
|
||||
|
||||
def transitions(self, year):
|
||||
|
@ -188,6 +200,17 @@ class tzwinbase(tzrangebase):
|
|||
|
||||
|
||||
class tzwin(tzwinbase):
|
||||
"""
|
||||
Time zone object created from the zone info in the Windows registry
|
||||
|
||||
These are similar to :py:class:`dateutil.tz.tzrange` objects in that
|
||||
the time zone data is provided in the format of a single offset rule
|
||||
for either 0 or 2 time zone transitions per year.
|
||||
|
||||
:param: name
|
||||
The name of a Windows time zone key, e.g. "Eastern Standard Time".
|
||||
The full list of keys can be retrieved with :func:`tzwin.list`.
|
||||
"""
|
||||
|
||||
def __init__(self, name):
|
||||
self._name = name
|
||||
|
@ -234,6 +257,22 @@ class tzwin(tzwinbase):
|
|||
|
||||
|
||||
class tzwinlocal(tzwinbase):
|
||||
"""
|
||||
Class representing the local time zone information in the Windows registry
|
||||
|
||||
While :class:`dateutil.tz.tzlocal` makes system calls (via the :mod:`time`
|
||||
module) to retrieve time zone information, ``tzwinlocal`` retrieves the
|
||||
rules directly from the Windows registry and creates an object like
|
||||
:class:`dateutil.tz.tzwin`.
|
||||
|
||||
Because Windows does not have an equivalent of :func:`time.tzset`, on
|
||||
Windows, :class:`dateutil.tz.tzlocal` instances will always reflect the
|
||||
time zone settings *at the time that the process was started*, meaning
|
||||
changes to the machine's time zone settings during the run of a program
|
||||
on Windows will **not** be reflected by :class:`dateutil.tz.tzlocal`.
|
||||
Because ``tzwinlocal`` reads the registry directly, it is unaffected by
|
||||
this issue.
|
||||
"""
|
||||
def __init__(self):
|
||||
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle:
|
||||
with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey:
|
||||
|
|
|
@ -28,7 +28,7 @@ def today(tzinfo=None):
|
|||
|
||||
def default_tzinfo(dt, tzinfo):
|
||||
"""
|
||||
Sets the the ``tzinfo`` parameter on naive datetimes only
|
||||
Sets the ``tzinfo`` parameter on naive datetimes only
|
||||
|
||||
This is useful for example when you are provided a datetime that may have
|
||||
either an implicit or explicit time zone, such as when parsing a time zone
|
||||
|
@ -63,7 +63,7 @@ def default_tzinfo(dt, tzinfo):
|
|||
|
||||
def within_delta(dt1, dt2, delta):
|
||||
"""
|
||||
Useful for comparing two datetimes that may a negilible difference
|
||||
Useful for comparing two datetimes that may have a negligible difference
|
||||
to be considered equal.
|
||||
"""
|
||||
delta = abs(delta)
|
||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@ import os
|
|||
import tempfile
|
||||
import shutil
|
||||
import json
|
||||
from subprocess import check_call
|
||||
from subprocess import check_call, check_output
|
||||
from tarfile import TarFile
|
||||
|
||||
from dateutil.zoneinfo import METADATA_FN, ZONEFILENAME
|
||||
|
@ -23,11 +23,9 @@ def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None):
|
|||
for name in zonegroups:
|
||||
tf.extract(name, tmpdir)
|
||||
filepaths = [os.path.join(tmpdir, n) for n in zonegroups]
|
||||
try:
|
||||
check_call(["zic", "-d", zonedir] + filepaths)
|
||||
except OSError as e:
|
||||
_print_on_nosuchfile(e)
|
||||
raise
|
||||
|
||||
_run_zic(zonedir, filepaths)
|
||||
|
||||
# write metadata file
|
||||
with open(os.path.join(zonedir, METADATA_FN), 'w') as f:
|
||||
json.dump(metadata, f, indent=4, sort_keys=True)
|
||||
|
@ -40,6 +38,30 @@ def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None):
|
|||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def _run_zic(zonedir, filepaths):
|
||||
"""Calls the ``zic`` compiler in a compatible way to get a "fat" binary.
|
||||
|
||||
Recent versions of ``zic`` default to ``-b slim``, while older versions
|
||||
don't even have the ``-b`` option (but default to "fat" binaries). The
|
||||
current version of dateutil does not support Version 2+ TZif files, which
|
||||
causes problems when used in conjunction with "slim" binaries, so this
|
||||
function is used to ensure that we always get a "fat" binary.
|
||||
"""
|
||||
|
||||
try:
|
||||
help_text = check_output(["zic", "--help"])
|
||||
except OSError as e:
|
||||
_print_on_nosuchfile(e)
|
||||
raise
|
||||
|
||||
if b"-b " in help_text:
|
||||
bloat_args = ["-b", "fat"]
|
||||
else:
|
||||
bloat_args = []
|
||||
|
||||
check_call(["zic"] + bloat_args + ["-d", zonedir] + filepaths)
|
||||
|
||||
|
||||
def _print_on_nosuchfile(e):
|
||||
"""Print helpful troubleshooting message
|
||||
|
||||
|
|
|
@ -142,7 +142,7 @@ def main(args=None): # pylint:disable=too-many-branches
|
|||
|
||||
if options.get('yaml'):
|
||||
try:
|
||||
import yaml # pylint:disable=unused-variable
|
||||
import yaml # pylint:disable=unused-variable,unused-import
|
||||
except ImportError: # pragma: no cover
|
||||
del options['yaml']
|
||||
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '3.0.3'
|
||||
__version__ = '3.1.1'
|
||||
|
|
|
@ -82,6 +82,19 @@ def properties(options=None):
|
|||
return default_api.properties(options)
|
||||
|
||||
|
||||
def suggested_expected(titles, options=None):
|
||||
"""
|
||||
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
||||
:param titles: the filename or release name
|
||||
:type titles: list|set|dict
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype: list of str
|
||||
"""
|
||||
return default_api.suggested_expected(titles, options)
|
||||
|
||||
|
||||
class GuessItApi(object):
|
||||
"""
|
||||
An api class that can be configured with custom Rebulk configuration.
|
||||
|
@ -228,5 +241,23 @@ class GuessItApi(object):
|
|||
ordered = self.rebulk.customize_properties(ordered)
|
||||
return ordered
|
||||
|
||||
def suggested_expected(self, titles, options=None):
|
||||
"""
|
||||
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
||||
:param titles: the filename or release name
|
||||
:type titles: list|set|dict
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype: list of str
|
||||
"""
|
||||
suggested = []
|
||||
for title in titles:
|
||||
guess = self.guessit(title, options)
|
||||
if len(guess) != 2 or 'title' not in guess:
|
||||
suggested.append(title)
|
||||
|
||||
return suggested
|
||||
|
||||
|
||||
default_api = GuessItApi()
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
Backports
|
||||
"""
|
||||
# pragma: no-cover
|
||||
# pylint: disabled
|
||||
# pylint: skip-file
|
||||
|
||||
def cmp_to_key(mycmp):
|
||||
"""functools.cmp_to_key backport"""
|
||||
|
|
|
@ -1,18 +1,19 @@
|
|||
{
|
||||
"expected_title": [
|
||||
"OSS 117"
|
||||
"OSS 117",
|
||||
"This is Us"
|
||||
],
|
||||
"allowed_countries": [
|
||||
"au",
|
||||
"us",
|
||||
"gb"
|
||||
"gb",
|
||||
"us"
|
||||
],
|
||||
"allowed_languages": [
|
||||
"ca",
|
||||
"cs",
|
||||
"de",
|
||||
"en",
|
||||
"es",
|
||||
"ca",
|
||||
"cs",
|
||||
"fr",
|
||||
"he",
|
||||
"hi",
|
||||
|
@ -20,7 +21,9 @@
|
|||
"it",
|
||||
"ja",
|
||||
"ko",
|
||||
"mul",
|
||||
"nl",
|
||||
"no",
|
||||
"pl",
|
||||
"pt",
|
||||
"ro",
|
||||
|
@ -28,18 +31,50 @@
|
|||
"sv",
|
||||
"te",
|
||||
"uk",
|
||||
"mul",
|
||||
"und"
|
||||
],
|
||||
"advanced_config": {
|
||||
"common_words": [
|
||||
"ca",
|
||||
"cat",
|
||||
"de",
|
||||
"it"
|
||||
"he",
|
||||
"it",
|
||||
"no",
|
||||
"por",
|
||||
"rum",
|
||||
"se",
|
||||
"st",
|
||||
"sub"
|
||||
],
|
||||
"groups": {
|
||||
"starting": "([{",
|
||||
"ending": ")]}"
|
||||
},
|
||||
"audio_codec": {
|
||||
"audio_channels": {
|
||||
"1.0": [
|
||||
"1ch",
|
||||
"mono"
|
||||
],
|
||||
"2.0": [
|
||||
"2ch",
|
||||
"stereo",
|
||||
"re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
||||
],
|
||||
"5.1": [
|
||||
"5ch",
|
||||
"6ch",
|
||||
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
|
||||
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
||||
],
|
||||
"7.1": [
|
||||
"7ch",
|
||||
"8ch",
|
||||
"re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
|
||||
]
|
||||
}
|
||||
},
|
||||
"container": {
|
||||
"subtitles": [
|
||||
"srt",
|
||||
|
@ -59,9 +94,10 @@
|
|||
"avi",
|
||||
"divx",
|
||||
"flv",
|
||||
"mk3d",
|
||||
"iso",
|
||||
"m4v",
|
||||
"mk2",
|
||||
"mk3d",
|
||||
"mka",
|
||||
"mkv",
|
||||
"mov",
|
||||
|
@ -77,12 +113,11 @@
|
|||
"ram",
|
||||
"rm",
|
||||
"ts",
|
||||
"vob",
|
||||
"wav",
|
||||
"webm",
|
||||
"wma",
|
||||
"wmv",
|
||||
"iso",
|
||||
"vob"
|
||||
"wmv"
|
||||
],
|
||||
"torrent": [
|
||||
"torrent"
|
||||
|
@ -255,7 +290,6 @@
|
|||
],
|
||||
"subtitle_prefixes": [
|
||||
"st",
|
||||
"v",
|
||||
"vost",
|
||||
"subforced",
|
||||
"fansub",
|
||||
|
@ -297,12 +331,12 @@
|
|||
},
|
||||
"release_group": {
|
||||
"forbidden_names": [
|
||||
"rip",
|
||||
"bonus",
|
||||
"by",
|
||||
"for",
|
||||
"par",
|
||||
"pour",
|
||||
"bonus"
|
||||
"rip"
|
||||
],
|
||||
"ignored_seps": "[]{}()"
|
||||
},
|
||||
|
@ -311,6 +345,7 @@
|
|||
"23.976",
|
||||
"24",
|
||||
"25",
|
||||
"29.970",
|
||||
"30",
|
||||
"48",
|
||||
"50",
|
||||
|
@ -329,6 +364,7 @@
|
|||
"progressive": [
|
||||
"360",
|
||||
"480",
|
||||
"540",
|
||||
"576",
|
||||
"900",
|
||||
"1080",
|
||||
|
@ -342,8 +378,8 @@
|
|||
"website": {
|
||||
"safe_tlds": [
|
||||
"com",
|
||||
"org",
|
||||
"net"
|
||||
"net",
|
||||
"org"
|
||||
],
|
||||
"safe_subdomains": [
|
||||
"www"
|
||||
|
@ -351,12 +387,200 @@
|
|||
"safe_prefixes": [
|
||||
"co",
|
||||
"com",
|
||||
"org",
|
||||
"net"
|
||||
"net",
|
||||
"org"
|
||||
],
|
||||
"prefixes": [
|
||||
"from"
|
||||
]
|
||||
},
|
||||
"streaming_service": {
|
||||
"A&E": [
|
||||
"AE",
|
||||
"A&E"
|
||||
],
|
||||
"ABC": "AMBC",
|
||||
"ABC Australia": "AUBC",
|
||||
"Al Jazeera English": "AJAZ",
|
||||
"AMC": "AMC",
|
||||
"Amazon Prime": [
|
||||
"AMZN",
|
||||
"Amazon",
|
||||
"re:Amazon-?Prime"
|
||||
],
|
||||
"Adult Swim": [
|
||||
"AS",
|
||||
"re:Adult-?Swim"
|
||||
],
|
||||
"America's Test Kitchen": "ATK",
|
||||
"Animal Planet": "ANPL",
|
||||
"AnimeLab": "ANLB",
|
||||
"AOL": "AOL",
|
||||
"ARD": "ARD",
|
||||
"BBC iPlayer": [
|
||||
"iP",
|
||||
"re:BBC-?iPlayer"
|
||||
],
|
||||
"BravoTV": "BRAV",
|
||||
"Canal+": "CNLP",
|
||||
"Cartoon Network": "CN",
|
||||
"CBC": "CBC",
|
||||
"CBS": "CBS",
|
||||
"CNBC": "CNBC",
|
||||
"Comedy Central": [
|
||||
"CC",
|
||||
"re:Comedy-?Central"
|
||||
],
|
||||
"Channel 4": "4OD",
|
||||
"CHRGD": "CHGD",
|
||||
"Cinemax": "CMAX",
|
||||
"Country Music Television": "CMT",
|
||||
"Comedians in Cars Getting Coffee": "CCGC",
|
||||
"Crunchy Roll": [
|
||||
"CR",
|
||||
"re:Crunchy-?Roll"
|
||||
],
|
||||
"Crackle": "CRKL",
|
||||
"CSpan": "CSPN",
|
||||
"CTV": "CTV",
|
||||
"CuriosityStream": "CUR",
|
||||
"CWSeed": "CWS",
|
||||
"Daisuki": "DSKI",
|
||||
"DC Universe": "DCU",
|
||||
"Deadhouse Films": "DHF",
|
||||
"DramaFever": [
|
||||
"DF",
|
||||
"DramaFever"
|
||||
],
|
||||
"Digiturk Diledigin Yerde": "DDY",
|
||||
"Discovery": [
|
||||
"DISC",
|
||||
"Discovery"
|
||||
],
|
||||
"Disney": [
|
||||
"DSNY",
|
||||
"Disney"
|
||||
],
|
||||
"DIY Network": "DIY",
|
||||
"Doc Club": "DOCC",
|
||||
"DPlay": "DPLY",
|
||||
"E!": "ETV",
|
||||
"ePix": "EPIX",
|
||||
"El Trece": "ETTV",
|
||||
"ESPN": "ESPN",
|
||||
"Esquire": "ESQ",
|
||||
"Family": "FAM",
|
||||
"Family Jr": "FJR",
|
||||
"Food Network": "FOOD",
|
||||
"Fox": "FOX",
|
||||
"Freeform": "FREE",
|
||||
"FYI Network": "FYI",
|
||||
"Global": "GLBL",
|
||||
"GloboSat Play": "GLOB",
|
||||
"Hallmark": "HLMK",
|
||||
"HBO Go": [
|
||||
"HBO",
|
||||
"re:HBO-?Go"
|
||||
],
|
||||
"HGTV": "HGTV",
|
||||
"History": [
|
||||
"HIST",
|
||||
"History"
|
||||
],
|
||||
"Hulu": "HULU",
|
||||
"Investigation Discovery": "ID",
|
||||
"IFC": "IFC",
|
||||
"iTunes": "iTunes",
|
||||
"ITV": "ITV",
|
||||
"Knowledge Network": "KNOW",
|
||||
"Lifetime": "LIFE",
|
||||
"Motor Trend OnDemand": "MTOD",
|
||||
"MBC": [
|
||||
"MBC",
|
||||
"MBCVOD"
|
||||
],
|
||||
"MSNBC": "MNBC",
|
||||
"MTV": "MTV",
|
||||
"National Geographic": [
|
||||
"NATG",
|
||||
"re:National-?Geographic"
|
||||
],
|
||||
"NBA TV": [
|
||||
"NBA",
|
||||
"re:NBA-?TV"
|
||||
],
|
||||
"NBC": "NBC",
|
||||
"Netflix": [
|
||||
"NF",
|
||||
"Netflix"
|
||||
],
|
||||
"NFL": "NFL",
|
||||
"NFL Now": "NFLN",
|
||||
"NHL GameCenter": "GC",
|
||||
"Nickelodeon": [
|
||||
"NICK",
|
||||
"Nickelodeon"
|
||||
],
|
||||
"Norsk Rikskringkasting": "NRK",
|
||||
"OnDemandKorea": [
|
||||
"ODK",
|
||||
"OnDemandKorea"
|
||||
],
|
||||
"PBS": "PBS",
|
||||
"PBS Kids": "PBSK",
|
||||
"Playstation Network": "PSN",
|
||||
"Pluzz": "PLUZ",
|
||||
"RTE One": "RTE",
|
||||
"SBS (AU)": "SBS",
|
||||
"SeeSo": [
|
||||
"SESO",
|
||||
"SeeSo"
|
||||
],
|
||||
"Shomi": "SHMI",
|
||||
"Spike": "SPIK",
|
||||
"Spike TV": [
|
||||
"SPKE",
|
||||
"re:Spike-?TV"
|
||||
],
|
||||
"Sportsnet": "SNET",
|
||||
"Sprout": "SPRT",
|
||||
"Stan": "STAN",
|
||||
"Starz": "STZ",
|
||||
"Sveriges Television": "SVT",
|
||||
"SwearNet": "SWER",
|
||||
"Syfy": "SYFY",
|
||||
"TBS": "TBS",
|
||||
"TFou": "TFOU",
|
||||
"The CW": [
|
||||
"CW",
|
||||
"re:The-?CW"
|
||||
],
|
||||
"TLC": "TLC",
|
||||
"TubiTV": "TUBI",
|
||||
"TV3 Ireland": "TV3",
|
||||
"TV4 Sweeden": "TV4",
|
||||
"TVING": "TVING",
|
||||
"TV Land": [
|
||||
"TVL",
|
||||
"re:TV-?Land"
|
||||
],
|
||||
"UFC": "UFC",
|
||||
"UKTV": "UKTV",
|
||||
"Univision": "UNIV",
|
||||
"USA Network": "USAN",
|
||||
"Velocity": "VLCT",
|
||||
"VH1": "VH1",
|
||||
"Viceland": "VICE",
|
||||
"Viki": "VIKI",
|
||||
"Vimeo": "VMEO",
|
||||
"VRV": "VRV",
|
||||
"W Network": "WNET",
|
||||
"WatchMe": "WME",
|
||||
"WWE Network": "WWEN",
|
||||
"Xbox Video": "XBOX",
|
||||
"Yahoo": "YHOO",
|
||||
"YouTube Red": "RED",
|
||||
"ZDF": "ZDF"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -128,7 +128,7 @@ class ConfigurationException(Exception):
|
|||
"""
|
||||
Exception related to configuration file.
|
||||
"""
|
||||
pass
|
||||
pass # pylint:disable=unnecessary-pass
|
||||
|
||||
|
||||
def load_config(options):
|
||||
|
@ -153,7 +153,7 @@ def load_config(options):
|
|||
cwd = os.getcwd()
|
||||
yaml_supported = False
|
||||
try:
|
||||
import yaml # pylint: disable=unused-variable
|
||||
import yaml # pylint:disable=unused-variable,unused-import
|
||||
yaml_supported = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
@ -252,7 +252,7 @@ def load_config_file(filepath):
|
|||
try:
|
||||
import yaml
|
||||
with open(filepath) as config_file_data:
|
||||
return yaml.load(config_file_data)
|
||||
return yaml.load(config_file_data, yaml.SafeLoader)
|
||||
except ImportError: # pragma: no cover
|
||||
raise ConfigurationException('Configuration file extension is not supported. '
|
||||
'PyYAML should be installed to support "%s" file' % (
|
||||
|
|
|
@ -25,7 +25,7 @@ def _potential_before(i, input_string):
|
|||
:return:
|
||||
:rtype: bool
|
||||
"""
|
||||
return i - 2 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
|
||||
return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
|
||||
|
||||
|
||||
def _potential_after(i, input_string):
|
||||
|
|
|
@ -28,7 +28,7 @@ def int_coercable(string):
|
|||
return False
|
||||
|
||||
|
||||
def compose(*validators):
|
||||
def and_(*validators):
|
||||
"""
|
||||
Compose validators functions
|
||||
:param validators:
|
||||
|
@ -49,3 +49,26 @@ def compose(*validators):
|
|||
return False
|
||||
return True
|
||||
return composed
|
||||
|
||||
|
||||
def or_(*validators):
|
||||
"""
|
||||
Compose validators functions
|
||||
:param validators:
|
||||
:type validators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
def composed(string):
|
||||
"""
|
||||
Composed validators function
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for validator in validators:
|
||||
if validator(string):
|
||||
return True
|
||||
return False
|
||||
return composed
|
||||
|
|
20
libs/common/guessit/rules/match_processors.py
Normal file
20
libs/common/guessit/rules/match_processors.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
"""
|
||||
Match processors
|
||||
"""
|
||||
from guessit.rules.common import seps
|
||||
|
||||
|
||||
def strip(match, chars=seps):
|
||||
"""
|
||||
Strip given characters from match.
|
||||
|
||||
:param chars:
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
while match.input_string[match.start] in chars:
|
||||
match.start += 1
|
||||
while match.input_string[match.end - 1] in chars:
|
||||
match.end -= 1
|
||||
if not match:
|
||||
return False
|
|
@ -34,7 +34,9 @@ class EnlargeGroupMatches(CustomRule):
|
|||
for match in matches.ending(group.end - 1):
|
||||
ending.append(match)
|
||||
|
||||
return starting, ending
|
||||
if starting or ending:
|
||||
return starting, ending
|
||||
return False
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
starting, ending = when_response
|
||||
|
|
|
@ -3,9 +3,8 @@
|
|||
"""
|
||||
audio_codec, audio_profile and audio_channels property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
|
@ -23,7 +22,9 @@ def audio_codec(config): # pylint:disable=unused-argument
|
|||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk = Rebulk()\
|
||||
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
|
||||
.string_defaults(ignore_case=True)
|
||||
|
||||
def audio_codec_priority(match1, match2):
|
||||
"""
|
||||
|
@ -61,7 +62,9 @@ def audio_codec(config): # pylint:disable=unused-argument
|
|||
rebulk.string('PCM', value='PCM')
|
||||
rebulk.string('LPCM', value='LPCM')
|
||||
|
||||
rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile'))
|
||||
rebulk.defaults(clear=True,
|
||||
name='audio_profile',
|
||||
disabled=lambda context: is_disabled(context, 'audio_profile'))
|
||||
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
|
||||
|
@ -70,17 +73,19 @@ def audio_codec(config): # pylint:disable=unused-argument
|
|||
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||
|
||||
rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels'))
|
||||
rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True)
|
||||
rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True)
|
||||
rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True)
|
||||
rebulk.defaults(clear=True,
|
||||
name="audio_channels",
|
||||
disabled=lambda context: is_disabled(context, 'audio_channels'))
|
||||
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.string('7ch', '8ch', value='7.1')
|
||||
rebulk.string('5ch', '6ch', value='5.1')
|
||||
rebulk.string('2ch', 'stereo', value='2.0')
|
||||
rebulk.string('1ch', 'mono', value='1.0')
|
||||
|
||||
for value, items in config.get('audio_channels').items():
|
||||
for item in items:
|
||||
if item.startswith('re:'):
|
||||
rebulk.regex(item[3:], value=value, children=True)
|
||||
else:
|
||||
rebulk.string(item, value=value)
|
||||
|
||||
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
|
||||
AudioChannelsValidatorRule)
|
||||
|
|
|
@ -69,4 +69,6 @@ class BitRateTypeRule(Rule):
|
|||
else:
|
||||
to_rename.append(match)
|
||||
|
||||
return to_rename, to_remove
|
||||
if to_rename or to_remove:
|
||||
return to_rename, to_remove
|
||||
return False
|
||||
|
|
|
@ -26,7 +26,8 @@ def bonus(config): # pylint:disable=unused-argument
|
|||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
|
||||
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
|
||||
validator={'__parent__': lambda match: seps_surround},
|
||||
validator={'__parent__': seps_surround},
|
||||
validate_all=True,
|
||||
conflict_solver=lambda match, conflicting: match
|
||||
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
|
||||
else '__default__')
|
||||
|
|
|
@ -44,7 +44,8 @@ def container(config):
|
|||
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
|
||||
|
||||
rebulk.defaults(name='container',
|
||||
rebulk.defaults(clear=True,
|
||||
name='container',
|
||||
validator=seps_surround,
|
||||
formatter=lambda s: s.lower(),
|
||||
conflict_solver=lambda match, other: match
|
||||
|
|
|
@ -10,6 +10,7 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PRO
|
|||
from ..common import seps, title_seps
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import or_
|
||||
from ..properties.title import TitleFromPosition, TitleBaseRule
|
||||
from ..properties.type import TypeProcessor
|
||||
|
||||
|
@ -133,8 +134,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
|
|||
|
||||
def hole_filter(self, hole, matches):
|
||||
episode = matches.previous(hole,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in self.previous_names),
|
||||
lambda previous: previous.named(*self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
@ -179,8 +179,7 @@ class AlternativeTitleReplace(Rule):
|
|||
predicate=lambda match: 'title' in match.tags, index=0)
|
||||
if main_title:
|
||||
episode = matches.previous(main_title,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in self.previous_names),
|
||||
lambda previous: previous.named(*self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
@ -249,7 +248,7 @@ class Filepart3EpisodeTitle(Rule):
|
|||
|
||||
if season:
|
||||
hole = matches.holes(subdirectory.start, subdirectory.end,
|
||||
ignore=lambda match: 'weak-episode' in match.tags,
|
||||
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
||||
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
|
||||
index=0)
|
||||
if hole:
|
||||
|
@ -292,7 +291,8 @@ class Filepart2EpisodeTitle(Rule):
|
|||
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
|
||||
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
|
||||
if season:
|
||||
hole = matches.holes(directory.start, directory.end, ignore=lambda match: 'weak-episode' in match.tags,
|
||||
hole = matches.holes(directory.start, directory.end,
|
||||
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
||||
formatter=cleanup, seps=title_seps,
|
||||
predicate=lambda match: match.value, index=0)
|
||||
if hole:
|
||||
|
|
|
@ -11,12 +11,13 @@ from rebulk.match import Match
|
|||
from rebulk.remodule import re
|
||||
from rebulk.utils import is_iterable
|
||||
|
||||
from guessit.rules import match_processors
|
||||
from guessit.rules.common.numeral import parse_numeral, numeral
|
||||
from .title import TitleFromPosition
|
||||
from ..common import dash, alt_dash, seps, seps_no_fs
|
||||
from ..common.formatters import strip
|
||||
from ..common.numeral import numeral, parse_numeral
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import compose, seps_surround, seps_before, int_coercable
|
||||
from ..common.validators import seps_surround, int_coercable, and_
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
|
@ -29,17 +30,12 @@ def episodes(config):
|
|||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
|
||||
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
|
||||
def is_season_episode_disabled(context):
|
||||
"""Whether season and episode rules should be enabled."""
|
||||
return is_disabled(context, 'episode') or is_disabled(context, 'season')
|
||||
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
|
||||
|
||||
episode_max_range = config['episode_max_range']
|
||||
season_max_range = config['season_max_range']
|
||||
|
||||
def episodes_season_chain_breaker(matches):
|
||||
"""
|
||||
Break chains if there's more than 100 offset between two neighbor values.
|
||||
|
@ -57,8 +53,6 @@ def episodes(config):
|
|||
return True
|
||||
return False
|
||||
|
||||
rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
|
||||
|
||||
def season_episode_conflict_solver(match, other):
|
||||
"""
|
||||
Conflict solver for episode/season patterns
|
||||
|
@ -76,7 +70,6 @@ def episodes(config):
|
|||
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
||||
and not match.initiator.children.named(match.name + 'Marker')) or (
|
||||
other.name == 'screen_size' and not int_coercable(other.raw)):
|
||||
|
||||
return match
|
||||
if other.name in ('season', 'episode') and match.initiator != other.initiator:
|
||||
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
|
||||
|
@ -87,21 +80,6 @@ def episodes(config):
|
|||
return current
|
||||
return '__default__'
|
||||
|
||||
season_words = config['season_words']
|
||||
episode_words = config['episode_words']
|
||||
of_words = config['of_words']
|
||||
all_words = config['all_words']
|
||||
season_markers = config['season_markers']
|
||||
season_ep_markers = config['season_ep_markers']
|
||||
disc_markers = config['disc_markers']
|
||||
episode_markers = config['episode_markers']
|
||||
range_separators = config['range_separators']
|
||||
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
|
||||
strong_discrete_separators = config['discrete_separators']
|
||||
discrete_separators = strong_discrete_separators + weak_discrete_separators
|
||||
|
||||
max_range_gap = config['max_range_gap']
|
||||
|
||||
def ordering_validator(match):
|
||||
"""
|
||||
Validator for season list. They should be in natural order to be validated.
|
||||
|
@ -135,65 +113,18 @@ def episodes(config):
|
|||
lambda m: m.name == property_name + 'Separator')
|
||||
separator = match.children.previous(current_match,
|
||||
lambda m: m.name == property_name + 'Separator', 0)
|
||||
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
|
||||
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
|
||||
valid = False
|
||||
if separator.raw in strong_discrete_separators:
|
||||
valid = True
|
||||
break
|
||||
if separator:
|
||||
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
|
||||
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
|
||||
valid = False
|
||||
if separator.raw in strong_discrete_separators:
|
||||
valid = True
|
||||
break
|
||||
previous_match = current_match
|
||||
return valid
|
||||
|
||||
return is_consecutive('episode') and is_consecutive('season')
|
||||
|
||||
# S01E02, 01x02, S01S02S03
|
||||
rebulk.chain(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
validate_all=True,
|
||||
validator={'__parent__': ordering_validator},
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
disabled=is_season_episode_disabled) \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}).repeater('+') \
|
||||
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*') \
|
||||
.chain() \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}) \
|
||||
.chain() \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}) \
|
||||
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*') \
|
||||
.chain() \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}) \
|
||||
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
|
||||
name='seasonSeparator',
|
||||
escape=True) +
|
||||
r'(?P<season>\d+)').repeater('*')
|
||||
|
||||
# episode_details property
|
||||
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
|
||||
rebulk.string(episode_detail, value=episode_detail, name='episode_details',
|
||||
disabled=lambda context: is_disabled(context, 'episode_details'))
|
||||
|
||||
def validate_roman(match):
|
||||
"""
|
||||
Validate a roman match if surrounded by separators
|
||||
|
@ -206,117 +137,203 @@ def episodes(config):
|
|||
return True
|
||||
return seps_surround(match)
|
||||
|
||||
season_words = config['season_words']
|
||||
episode_words = config['episode_words']
|
||||
of_words = config['of_words']
|
||||
all_words = config['all_words']
|
||||
season_markers = config['season_markers']
|
||||
season_ep_markers = config['season_ep_markers']
|
||||
disc_markers = config['disc_markers']
|
||||
episode_markers = config['episode_markers']
|
||||
range_separators = config['range_separators']
|
||||
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
|
||||
strong_discrete_separators = config['discrete_separators']
|
||||
discrete_separators = strong_discrete_separators + weak_discrete_separators
|
||||
episode_max_range = config['episode_max_range']
|
||||
season_max_range = config['season_max_range']
|
||||
max_range_gap = config['max_range_gap']
|
||||
|
||||
rebulk = Rebulk() \
|
||||
.regex_defaults(flags=re.IGNORECASE) \
|
||||
.string_defaults(ignore_case=True) \
|
||||
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
|
||||
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
||||
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
abbreviations=[alt_dash])
|
||||
|
||||
# S01E02, 01x02, S01S02S03
|
||||
rebulk.chain(
|
||||
tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
|
||||
.repeater('+') \
|
||||
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*')
|
||||
|
||||
rebulk.chain(tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)').repeater('+') \
|
||||
|
||||
rebulk.chain(tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)') \
|
||||
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*')
|
||||
|
||||
rebulk.chain(tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
|
||||
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
|
||||
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
|
||||
name='seasonSeparator',
|
||||
escape=True) +
|
||||
r'(?P<season>\d+)').repeater('*')
|
||||
|
||||
# episode_details property
|
||||
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
|
||||
rebulk.string(episode_detail,
|
||||
private_parent=False,
|
||||
children=False,
|
||||
value=episode_detail,
|
||||
name='episode_details',
|
||||
disabled=lambda context: is_disabled(context, 'episode_details'))
|
||||
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
||||
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=season_episode_conflict_solver)
|
||||
|
||||
rebulk.chain(abbreviations=[alt_dash],
|
||||
rebulk.chain(validate_all=True,
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
formatter={'season': parse_numeral, 'count': parse_numeral},
|
||||
validator={'__parent__': compose(seps_surround, ordering_validator),
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator),
|
||||
'season': validate_roman,
|
||||
'count': validate_roman},
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
|
||||
validator={'season': validate_roman, 'count': validate_roman},
|
||||
conflict_solver=season_episode_conflict_solver) \
|
||||
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
|
||||
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
|
||||
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
|
||||
name='seasonSeparator', escape=True) +
|
||||
r'@?(?P<season>\d+)').repeater('*')
|
||||
|
||||
rebulk.defaults(abbreviations=[dash])
|
||||
|
||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
|
||||
r'(?:v(?P<version>\d+))?' +
|
||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||
abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
|
||||
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
|
||||
r'(?:v(?P<version>\d+))?' +
|
||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||
abbreviations=[dash],
|
||||
validator={'episode': validate_roman},
|
||||
formatter={'episode': parse_numeral, 'version': int, 'count': int},
|
||||
formatter={'episode': parse_numeral},
|
||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[dash],
|
||||
validator=None,
|
||||
formatter={'season': int, 'other': lambda match: 'Complete'},
|
||||
formatter={'other': lambda match: 'Complete'},
|
||||
disabled=lambda context: is_disabled(context, 'season'))
|
||||
|
||||
# 12, 13
|
||||
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(validator=None, tags=['weak-episode']) \
|
||||
.regex(r'(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
||||
|
||||
# 012, 013
|
||||
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(validator=None, tags=['weak-episode']) \
|
||||
.regex(r'0(?P<episode>\d{1,2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
||||
|
||||
# 112, 113
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
formatter={'episode': int, 'version': int},
|
||||
name='weak_episode',
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
|
||||
.regex(r'(?P<episode>\d{3,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
|
||||
|
||||
# 1, 2, 3
|
||||
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(validator=None, tags=['weak-episode']) \
|
||||
.regex(r'(?P<episode>\d)') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
||||
|
||||
# e112, e113, 1e18, 3e19
|
||||
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
|
||||
rebulk.chain(formatter={'season': int, 'episode': int, 'version': int},
|
||||
disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
||||
|
||||
# ep 112, ep113, ep112, ep113
|
||||
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'ep-?(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
||||
|
||||
# cap 112, cap 112_114
|
||||
rebulk.chain(abbreviations=[dash],
|
||||
tags=['see-pattern'],
|
||||
formatter={'season': int, 'episode': int},
|
||||
rebulk.chain(tags=['see-pattern'],
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(validator=None, tags=['see-pattern']) \
|
||||
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
|
||||
|
||||
# 102, 0102
|
||||
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
|
||||
formatter={'season': int, 'episode': int, 'version': int},
|
||||
name='weak_duplicate',
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
disabled=lambda context: (context.get('episode_prefer_number', False) or
|
||||
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(tags=['weak-episode', 'weak-duplicate'],
|
||||
name='weak_duplicate',
|
||||
validator=None,
|
||||
conflict_solver=season_episode_conflict_solver) \
|
||||
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
|
||||
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
||||
|
||||
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int,
|
||||
rebulk.regex(r'v(?P<version>\d+)',
|
||||
formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'version'))
|
||||
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
|
||||
|
@ -325,18 +342,23 @@ def episodes(config):
|
|||
# detached of X count (season/episode)
|
||||
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
|
||||
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
|
||||
abbreviations=[dash], children=True, private_parent=True, formatter=int,
|
||||
formatter=int,
|
||||
pre_match_processor=match_processors.strip,
|
||||
disabled=lambda context: is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode",
|
||||
rebulk.regex(r'Minisodes?',
|
||||
children=False,
|
||||
private_parent=False,
|
||||
name='episode_format',
|
||||
value="Minisode",
|
||||
disabled=lambda context: is_disabled(context, 'episode_format'))
|
||||
|
||||
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
|
||||
SeePatternRange(range_separators + ['_']),
|
||||
EpisodeNumberSeparatorRange(range_separators),
|
||||
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
|
||||
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
|
||||
RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
|
||||
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
|
||||
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
|
||||
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
@ -416,7 +438,9 @@ class WeakConflictSolver(Rule):
|
|||
if to_append:
|
||||
to_remove.extend(weak_dup_matches)
|
||||
|
||||
return to_remove, to_append
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class CountValidator(Rule):
|
||||
|
@ -442,7 +466,9 @@ class CountValidator(Rule):
|
|||
season_count.append(count)
|
||||
else:
|
||||
to_remove.append(count)
|
||||
return to_remove, episode_count, season_count
|
||||
if to_remove or episode_count or season_count:
|
||||
return to_remove, episode_count, season_count
|
||||
return False
|
||||
|
||||
|
||||
class SeePatternRange(Rule):
|
||||
|
@ -477,7 +503,9 @@ class SeePatternRange(Rule):
|
|||
|
||||
to_remove.append(separator)
|
||||
|
||||
return to_remove, to_append
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class AbstractSeparatorRange(Rule):
|
||||
|
@ -533,7 +561,9 @@ class AbstractSeparatorRange(Rule):
|
|||
|
||||
previous_match = next_match
|
||||
|
||||
return to_remove, to_append
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class RenameToAbsoluteEpisode(Rule):
|
||||
|
@ -629,20 +659,41 @@ class RemoveWeak(Rule):
|
|||
Remove weak-episode matches which appears after video, source, and audio matches.
|
||||
"""
|
||||
priority = 16
|
||||
consequence = RemoveMatch
|
||||
consequence = RemoveMatch, AppendMatch
|
||||
|
||||
def __init__(self, episode_words):
|
||||
super(RemoveWeak, self).__init__()
|
||||
self.episode_words = episode_words
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
|
||||
if weaks:
|
||||
previous = matches.previous(weaks[0], predicate=lambda m: m.name in (
|
||||
weak = weaks[0]
|
||||
previous = matches.previous(weak, predicate=lambda m: m.name in (
|
||||
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
|
||||
'audio_channels', 'audio_profile'), index=0)
|
||||
if previous and not matches.holes(
|
||||
previous.end, weaks[0].start, predicate=lambda m: m.raw.strip(seps)):
|
||||
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
|
||||
if previous.raw.lower() in self.episode_words:
|
||||
try:
|
||||
episode = copy.copy(weak)
|
||||
episode.name = 'episode'
|
||||
episode.value = int(weak.value)
|
||||
episode.start = previous.start
|
||||
episode.private = False
|
||||
episode.tags = []
|
||||
|
||||
to_append.append(episode)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
to_remove.extend(weaks)
|
||||
return to_remove
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class RemoveWeakIfSxxExx(Rule):
|
||||
|
@ -856,4 +907,6 @@ class RenameToDiscMatch(Rule):
|
|||
markers.append(marker)
|
||||
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
|
||||
|
||||
return discs, markers, to_remove
|
||||
if discs or markers or to_remove:
|
||||
return discs, markers, to_remove
|
||||
return False
|
||||
|
|
|
@ -72,6 +72,8 @@ def language(config, common_words):
|
|||
|
||||
|
||||
UNDETERMINED = babelfish.Language('und')
|
||||
MULTIPLE = babelfish.Language('mul')
|
||||
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
|
||||
|
||||
|
||||
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
|
||||
|
@ -388,7 +390,9 @@ class SubtitlePrefixLanguageRule(Rule):
|
|||
to_remove.extend(matches.conflicting(lang))
|
||||
if prefix in to_remove:
|
||||
to_remove.remove(prefix)
|
||||
return to_rename, to_remove
|
||||
if to_rename or to_remove:
|
||||
return to_rename, to_remove
|
||||
return False
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
to_rename, to_remove = when_response
|
||||
|
@ -425,7 +429,9 @@ class SubtitleSuffixLanguageRule(Rule):
|
|||
to_append.append(lang)
|
||||
if suffix in to_remove:
|
||||
to_remove.remove(suffix)
|
||||
return to_append, to_remove
|
||||
if to_append or to_remove:
|
||||
return to_append, to_remove
|
||||
return False
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
to_rename, to_remove = when_response
|
||||
|
@ -478,6 +484,7 @@ class RemoveInvalidLanguages(Rule):
|
|||
"""Remove language matches that matches the blacklisted common words."""
|
||||
|
||||
consequence = RemoveMatch
|
||||
priority = 32
|
||||
|
||||
def __init__(self, common_words):
|
||||
"""Constructor."""
|
||||
|
|
|
@ -11,7 +11,7 @@ from rebulk.remodule import re
|
|||
from ..common import dash
|
||||
from ..common import seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_after, seps_before, seps_surround, compose
|
||||
from ..common.validators import seps_after, seps_before, seps_surround, and_
|
||||
from ...reutils import build_or_pattern
|
||||
from ...rules.common.formatters import raw_cleanup
|
||||
|
||||
|
@ -35,11 +35,16 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
|
|||
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
|
||||
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
|
||||
|
||||
rebulk.string('Proper', 'Repack', 'Rerip', value='Proper',
|
||||
rebulk.string('Repack', 'Rerip', value='Proper',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.string('Proper', value='Proper',
|
||||
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
|
||||
|
||||
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
||||
rebulk.regex('Real', value='Proper',
|
||||
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
||||
|
||||
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
|
||||
'streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
|
||||
|
@ -72,16 +77,18 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
|
|||
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
|
||||
value={'other': 'Complete'},
|
||||
tags=['release-group-prefix'],
|
||||
validator={'__parent__': compose(seps_surround, validate_complete)})
|
||||
validator={'__parent__': and_(seps_surround, validate_complete)})
|
||||
rebulk.string('R5', value='Region 5')
|
||||
rebulk.string('RC', value='Region C')
|
||||
rebulk.regex('Pre-?Air', value='Preair')
|
||||
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
|
||||
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
|
||||
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
|
||||
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
|
||||
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
|
||||
|
||||
for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'):
|
||||
for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
|
||||
rebulk.string(value, value=value)
|
||||
rebulk.string('3D', value='3D', tags='has-neighbor')
|
||||
|
||||
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
|
||||
rebulk.string('HR', value='High Resolution')
|
||||
|
@ -90,6 +97,7 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
|
|||
rebulk.string('mHD', 'HDLight', value='Micro HD')
|
||||
rebulk.string('LDTV', value='Low Definition')
|
||||
rebulk.string('HFR', value='High Frame Rate')
|
||||
rebulk.string('VFR', value='Variable Frame Rate')
|
||||
rebulk.string('HD', value='HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
|
||||
|
@ -128,13 +136,15 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
|
|||
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
|
||||
|
||||
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('Extras', value='Extras', tags='has-neighbor')
|
||||
rebulk.regex('Digital-?Extras?', value='Extras')
|
||||
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
|
||||
|
||||
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
|
||||
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
|
||||
ValidateAtEnd, ProperCountRule)
|
||||
ValidateAtEnd, ValidateReal, ProperCountRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
@ -354,3 +364,20 @@ class ValidateAtEnd(Rule):
|
|||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ValidateReal(Rule):
|
||||
"""
|
||||
Validate Real
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
|
||||
if not matches.range(filepart.start, match.start):
|
||||
ret.append(match)
|
||||
|
||||
return ret
|
||||
|
|
|
@ -8,7 +8,7 @@ from rebulk.remodule import re
|
|||
from rebulk import Rebulk
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround, int_coercable, compose
|
||||
from ..common.validators import seps_surround, int_coercable, and_
|
||||
from ..common.numeral import numeral, parse_numeral
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
@ -41,6 +41,6 @@ def part(config): # pylint:disable=unused-argument
|
|||
|
||||
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
|
||||
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
|
||||
validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
|
||||
validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
|
||||
|
||||
return rebulk
|
||||
|
|
|
@ -9,8 +9,8 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
|
|||
from rebulk.match import Match
|
||||
|
||||
from ..common import seps
|
||||
from ..common.expected import build_expected_function
|
||||
from ..common.comparators import marker_sorted
|
||||
from ..common.expected import build_expected_function
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import int_coercable, seps_surround
|
||||
|
@ -50,7 +50,7 @@ def release_group(config):
|
|||
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
|
||||
string = string[:len(forbidden)]
|
||||
string = string.strip(groupname_seps)
|
||||
return string
|
||||
return string.strip()
|
||||
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
|
||||
|
||||
|
@ -72,7 +72,9 @@ _scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'a
|
|||
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
|
||||
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
|
||||
|
||||
_scene_previous_tags = ('release-group-prefix', )
|
||||
_scene_previous_tags = ('release-group-prefix',)
|
||||
|
||||
_scene_no_previous_tags = ('no-release-group-prefix',)
|
||||
|
||||
|
||||
class DashSeparatedReleaseGroup(Rule):
|
||||
|
@ -193,7 +195,8 @@ class DashSeparatedReleaseGroup(Rule):
|
|||
|
||||
if releasegroup.value:
|
||||
to_append.append(releasegroup)
|
||||
return to_remove, to_append
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
|
||||
|
||||
class SceneReleaseGroup(Rule):
|
||||
|
@ -212,6 +215,17 @@ class SceneReleaseGroup(Rule):
|
|||
super(SceneReleaseGroup, self).__init__()
|
||||
self.value_formatter = value_formatter
|
||||
|
||||
@staticmethod
|
||||
def is_previous_match(match):
|
||||
"""
|
||||
Check if match can precede release_group
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
|
||||
match.tagged(*_scene_previous_tags)
|
||||
|
||||
def when(self, matches, context): # pylint:disable=too-many-locals
|
||||
# If a release_group is found before, ignore this kind of release_group rule.
|
||||
|
||||
|
@ -253,13 +267,12 @@ class SceneReleaseGroup(Rule):
|
|||
|
||||
if match.start < filepart.start:
|
||||
return False
|
||||
return not match.private or match.name in _scene_previous_names
|
||||
return not match.private or self.is_previous_match(match)
|
||||
|
||||
previous_match = matches.previous(last_hole,
|
||||
previous_match_filter,
|
||||
index=0)
|
||||
if previous_match and (previous_match.name in _scene_previous_names or
|
||||
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
|
||||
if previous_match and (self.is_previous_match(previous_match)) and \
|
||||
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
|
||||
and not int_coercable(last_hole.value.strip(seps)):
|
||||
|
||||
|
@ -300,11 +313,11 @@ class AnimeReleaseGroup(Rule):
|
|||
|
||||
# If a release_group is found before, ignore this kind of release_group rule.
|
||||
if matches.named('release_group'):
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
|
||||
# This doesn't seems to be an anime, and we already found another release_group.
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
||||
|
||||
|
@ -328,4 +341,7 @@ class AnimeReleaseGroup(Rule):
|
|||
to_append.append(group)
|
||||
to_remove.extend(matches.range(empty_group.start, empty_group.end,
|
||||
lambda m: 'weak-language' in m.tags))
|
||||
return to_remove, to_append
|
||||
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
|
|
@ -24,8 +24,8 @@ def screen_size(config):
|
|||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
interlaced = frozenset({res for res in config['interlaced']})
|
||||
progressive = frozenset({res for res in config['progressive']})
|
||||
interlaced = frozenset(config['interlaced'])
|
||||
progressive = frozenset(config['progressive'])
|
||||
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
|
||||
min_ar = config['min_ar']
|
||||
max_ar = config['max_ar']
|
||||
|
|
|
@ -12,7 +12,7 @@ from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
|
|||
from .audio_codec import HqConflictRule
|
||||
from ..common import dash, seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_before, seps_after
|
||||
from ..common.validators import seps_before, seps_after, or_
|
||||
|
||||
|
||||
def source(config): # pylint:disable=unused-argument
|
||||
|
@ -26,7 +26,10 @@ def source(config): # pylint:disable=unused-argument
|
|||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
|
||||
rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix'])
|
||||
rebulk = rebulk.defaults(name='source',
|
||||
tags=['video-codec-prefix', 'streaming_service.suffix'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': or_(seps_before, seps_after)})
|
||||
|
||||
rip_prefix = '(?P<other>Rip)-?'
|
||||
rip_suffix = '-?(?P<other>Rip)'
|
||||
|
@ -42,7 +45,7 @@ def source(config): # pylint:disable=unused-argument
|
|||
|
||||
def demote_other(match, other): # pylint: disable=unused-argument
|
||||
"""Default conflict solver with 'other' property."""
|
||||
return other if other.name == 'other' else '__default__'
|
||||
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
|
||||
|
||||
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
|
||||
value={'source': 'VHS', 'other': 'Rip'})
|
||||
|
@ -92,8 +95,9 @@ def source(config): # pylint:disable=unused-argument
|
|||
# WEBCap is a synonym to WEBRip, mostly used by non english
|
||||
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
|
||||
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'WEB', 'DL-?WEB', 'DL(?=-?Mux)'),
|
||||
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
|
||||
value={'source': 'Web'})
|
||||
rebulk.regex('(WEB)', value='Web', tags='weak.source')
|
||||
|
||||
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD-DVD', 'other': 'Rip'})
|
||||
|
@ -118,7 +122,7 @@ def source(config): # pylint:disable=unused-argument
|
|||
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
|
||||
value={'source': 'Satellite', 'other': 'Rip'})
|
||||
|
||||
rebulk.rules(ValidateSource, UltraHdBlurayRule)
|
||||
rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
@ -170,32 +174,62 @@ class UltraHdBlurayRule(Rule):
|
|||
to_remove.append(match)
|
||||
to_append.append(new_source)
|
||||
|
||||
return to_remove, to_append
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class ValidateSource(Rule):
|
||||
class ValidateSourcePrefixSuffix(Rule):
|
||||
"""
|
||||
Validate source with screener property, with video_codec property or separated
|
||||
Validate source with source prefix, source suffix.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for match in matches.named('source'):
|
||||
match = match.initiator
|
||||
if not seps_before(match) and \
|
||||
not matches.range(match.start - 1, match.start - 2,
|
||||
lambda m: 'source-prefix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
if not seps_after(match) and \
|
||||
not matches.range(match.end, match.end + 1,
|
||||
lambda m: 'source-suffix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
||||
match = match.initiator
|
||||
if not seps_before(match) and \
|
||||
not matches.range(match.start - 1, match.start - 2,
|
||||
lambda m: 'source-prefix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
if not seps_after(match) and \
|
||||
not matches.range(match.end, match.end + 1,
|
||||
lambda m: 'source-suffix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateWeakSource(Rule):
|
||||
"""
|
||||
Validate weak source
|
||||
"""
|
||||
dependency = [ValidateSourcePrefixSuffix]
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
||||
# if there are more than 1 source in this filepart, just before the year and with holes for the title
|
||||
# most likely the source is part of the title
|
||||
if 'weak.source' in match.tags \
|
||||
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
|
||||
and matches.holes(filepart.start, match.start,
|
||||
predicate=lambda m: m.value.strip(seps), index=-1):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
|
||||
return ret
|
||||
|
|
|
@ -25,133 +25,13 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
|
|||
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
|
||||
|
||||
rebulk.string('AE', 'A&E', value='A&E')
|
||||
rebulk.string('AMBC', value='ABC')
|
||||
rebulk.string('AUBC', value='ABC Australia')
|
||||
rebulk.string('AJAZ', value='Al Jazeera English')
|
||||
rebulk.string('AMC', value='AMC')
|
||||
rebulk.string('AMZN', 'Amazon', value='Amazon Prime')
|
||||
rebulk.regex('Amazon-?Prime', value='Amazon Prime')
|
||||
rebulk.string('AS', value='Adult Swim')
|
||||
rebulk.regex('Adult-?Swim', value='Adult Swim')
|
||||
rebulk.string('ATK', value="America's Test Kitchen")
|
||||
rebulk.string('ANPL', value='Animal Planet')
|
||||
rebulk.string('ANLB', value='AnimeLab')
|
||||
rebulk.string('AOL', value='AOL')
|
||||
rebulk.string('ARD', value='ARD')
|
||||
rebulk.string('iP', value='BBC iPlayer')
|
||||
rebulk.regex('BBC-?iPlayer', value='BBC iPlayer')
|
||||
rebulk.string('BRAV', value='BravoTV')
|
||||
rebulk.string('CNLP', value='Canal+')
|
||||
rebulk.string('CN', value='Cartoon Network')
|
||||
rebulk.string('CBC', value='CBC')
|
||||
rebulk.string('CBS', value='CBS')
|
||||
rebulk.string('CNBC', value='CNBC')
|
||||
rebulk.string('CC', value='Comedy Central')
|
||||
rebulk.string('4OD', value='Channel 4')
|
||||
rebulk.string('CHGD', value='CHRGD')
|
||||
rebulk.string('CMAX', value='Cinemax')
|
||||
rebulk.string('CMT', value='Country Music Television')
|
||||
rebulk.regex('Comedy-?Central', value='Comedy Central')
|
||||
rebulk.string('CCGC', value='Comedians in Cars Getting Coffee')
|
||||
rebulk.string('CR', value='Crunchy Roll')
|
||||
rebulk.string('CRKL', value='Crackle')
|
||||
rebulk.regex('Crunchy-?Roll', value='Crunchy Roll')
|
||||
rebulk.string('CSPN', value='CSpan')
|
||||
rebulk.string('CTV', value='CTV')
|
||||
rebulk.string('CUR', value='CuriosityStream')
|
||||
rebulk.string('CWS', value='CWSeed')
|
||||
rebulk.string('DSKI', value='Daisuki')
|
||||
rebulk.string('DHF', value='Deadhouse Films')
|
||||
rebulk.string('DDY', value='Digiturk Diledigin Yerde')
|
||||
rebulk.string('DISC', 'Discovery', value='Discovery')
|
||||
rebulk.string('DSNY', 'Disney', value='Disney')
|
||||
rebulk.string('DIY', value='DIY Network')
|
||||
rebulk.string('DOCC', value='Doc Club')
|
||||
rebulk.string('DPLY', value='DPlay')
|
||||
rebulk.string('ETV', value='E!')
|
||||
rebulk.string('EPIX', value='ePix')
|
||||
rebulk.string('ETTV', value='El Trece')
|
||||
rebulk.string('ESPN', value='ESPN')
|
||||
rebulk.string('ESQ', value='Esquire')
|
||||
rebulk.string('FAM', value='Family')
|
||||
rebulk.string('FJR', value='Family Jr')
|
||||
rebulk.string('FOOD', value='Food Network')
|
||||
rebulk.string('FOX', value='Fox')
|
||||
rebulk.string('FREE', value='Freeform')
|
||||
rebulk.string('FYI', value='FYI Network')
|
||||
rebulk.string('GLBL', value='Global')
|
||||
rebulk.string('GLOB', value='GloboSat Play')
|
||||
rebulk.string('HLMK', value='Hallmark')
|
||||
rebulk.string('HBO', value='HBO Go')
|
||||
rebulk.regex('HBO-?Go', value='HBO Go')
|
||||
rebulk.string('HGTV', value='HGTV')
|
||||
rebulk.string('HIST', 'History', value='History')
|
||||
rebulk.string('HULU', value='Hulu')
|
||||
rebulk.string('ID', value='Investigation Discovery')
|
||||
rebulk.string('IFC', value='IFC')
|
||||
rebulk.string('iTunes', 'iT', value='iTunes')
|
||||
rebulk.string('ITV', value='ITV')
|
||||
rebulk.string('KNOW', value='Knowledge Network')
|
||||
rebulk.string('LIFE', value='Lifetime')
|
||||
rebulk.string('MTOD', value='Motor Trend OnDemand')
|
||||
rebulk.string('MNBC', value='MSNBC')
|
||||
rebulk.string('MTV', value='MTV')
|
||||
rebulk.string('NATG', value='National Geographic')
|
||||
rebulk.regex('National-?Geographic', value='National Geographic')
|
||||
rebulk.string('NBA', value='NBA TV')
|
||||
rebulk.regex('NBA-?TV', value='NBA TV')
|
||||
rebulk.string('NBC', value='NBC')
|
||||
rebulk.string('NF', 'Netflix', value='Netflix')
|
||||
rebulk.string('NFL', value='NFL')
|
||||
rebulk.string('NFLN', value='NFL Now')
|
||||
rebulk.string('GC', value='NHL GameCenter')
|
||||
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
|
||||
rebulk.string('NRK', value='Norsk Rikskringkasting')
|
||||
rebulk.string('PBS', value='PBS')
|
||||
rebulk.string('PBSK', value='PBS Kids')
|
||||
rebulk.string('PSN', value='Playstation Network')
|
||||
rebulk.string('PLUZ', value='Pluzz')
|
||||
rebulk.string('RTE', value='RTE One')
|
||||
rebulk.string('SBS', value='SBS (AU)')
|
||||
rebulk.string('SESO', 'SeeSo', value='SeeSo')
|
||||
rebulk.string('SHMI', value='Shomi')
|
||||
rebulk.string('SPIK', value='Spike')
|
||||
rebulk.string('SPKE', value='Spike TV')
|
||||
rebulk.regex('Spike-?TV', value='Spike TV')
|
||||
rebulk.string('SNET', value='Sportsnet')
|
||||
rebulk.string('SPRT', value='Sprout')
|
||||
rebulk.string('STAN', value='Stan')
|
||||
rebulk.string('STZ', value='Starz')
|
||||
rebulk.string('SVT', value='Sveriges Television')
|
||||
rebulk.string('SWER', value='SwearNet')
|
||||
rebulk.string('SYFY', value='Syfy')
|
||||
rebulk.string('TBS', value='TBS')
|
||||
rebulk.string('TFOU', value='TFou')
|
||||
rebulk.string('CW', value='The CW')
|
||||
rebulk.regex('The-?CW', value='The CW')
|
||||
rebulk.string('TLC', value='TLC')
|
||||
rebulk.string('TUBI', value='TubiTV')
|
||||
rebulk.string('TV3', value='TV3 Ireland')
|
||||
rebulk.string('TV4', value='TV4 Sweeden')
|
||||
rebulk.string('TVL', value='TV Land')
|
||||
rebulk.regex('TV-?Land', value='TV Land')
|
||||
rebulk.string('UFC', value='UFC')
|
||||
rebulk.string('UKTV', value='UKTV')
|
||||
rebulk.string('UNIV', value='Univision')
|
||||
rebulk.string('USAN', value='USA Network')
|
||||
rebulk.string('VLCT', value='Velocity')
|
||||
rebulk.string('VH1', value='VH1')
|
||||
rebulk.string('VICE', value='Viceland')
|
||||
rebulk.string('VMEO', value='Vimeo')
|
||||
rebulk.string('VRV', value='VRV')
|
||||
rebulk.string('WNET', value='W Network')
|
||||
rebulk.string('WME', value='WatchMe')
|
||||
rebulk.string('WWEN', value='WWE Network')
|
||||
rebulk.string('XBOX', value='Xbox Video')
|
||||
rebulk.string('YHOO', value='Yahoo')
|
||||
rebulk.string('RED', value='YouTube Red')
|
||||
rebulk.string('ZDF', value='ZDF')
|
||||
for value, items in config.items():
|
||||
patterns = items if isinstance(items, list) else [items]
|
||||
for pattern in patterns:
|
||||
if pattern.startswith('re:'):
|
||||
rebulk.regex(pattern, value=value)
|
||||
else:
|
||||
rebulk.string(pattern, value=value)
|
||||
|
||||
rebulk.rules(ValidateStreamingService)
|
||||
|
||||
|
@ -161,7 +41,7 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
|
|||
class ValidateStreamingService(Rule):
|
||||
"""Validate streaming service matches."""
|
||||
|
||||
priority = 32
|
||||
priority = 128
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
|
|
|
@ -8,7 +8,12 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
|
|||
from rebulk.formatters import formatters
|
||||
|
||||
from .film import FilmTitleRule
|
||||
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
|
||||
from .language import (
|
||||
SubtitlePrefixLanguageRule,
|
||||
SubtitleSuffixLanguageRule,
|
||||
SubtitleExtensionRule,
|
||||
NON_SPECIFIC_LANGUAGES
|
||||
)
|
||||
from ..common import seps, title_seps
|
||||
from ..common.comparators import marker_sorted
|
||||
from ..common.expected import build_expected_function
|
||||
|
@ -88,12 +93,19 @@ class TitleBaseRule(Rule):
|
|||
:rtype:
|
||||
"""
|
||||
cropped_holes = []
|
||||
group_markers = matches.markers.named('group')
|
||||
for group_marker in group_markers:
|
||||
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
|
||||
if path_marker and path_marker.span == group_marker.span:
|
||||
group_markers.remove(group_marker)
|
||||
|
||||
for hole in holes:
|
||||
group_markers = matches.markers.named('group')
|
||||
cropped_holes.extend(hole.crop(group_markers))
|
||||
|
||||
return cropped_holes
|
||||
|
||||
def is_ignored(self, match):
|
||||
@staticmethod
|
||||
def is_ignored(match):
|
||||
"""
|
||||
Ignore matches when scanning for title (hole).
|
||||
|
||||
|
@ -130,7 +142,8 @@ class TitleBaseRule(Rule):
|
|||
for outside in outside_matches:
|
||||
other_languages.extend(matches.range(outside.start, outside.end,
|
||||
lambda c_match: c_match.name == match.name and
|
||||
c_match not in to_keep))
|
||||
c_match not in to_keep and
|
||||
c_match.value not in NON_SPECIFIC_LANGUAGES))
|
||||
|
||||
if not other_languages and (not starting or len(match.raw) <= 3):
|
||||
return True
|
||||
|
@ -239,7 +252,7 @@ class TitleBaseRule(Rule):
|
|||
to_remove = []
|
||||
|
||||
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
|
||||
return ret, to_remove
|
||||
return False
|
||||
|
||||
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
|
||||
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
|
||||
|
@ -272,7 +285,9 @@ class TitleBaseRule(Rule):
|
|||
ret.extend(titles)
|
||||
to_remove.extend(to_remove_c)
|
||||
|
||||
return ret, to_remove
|
||||
if ret or to_remove:
|
||||
return ret, to_remove
|
||||
return False
|
||||
|
||||
|
||||
class TitleFromPosition(TitleBaseRule):
|
||||
|
@ -329,4 +344,6 @@ class PreferTitleWithYear(Rule):
|
|||
for title_match in titles:
|
||||
if title_match.value not in title_values:
|
||||
to_remove.append(title_match)
|
||||
return to_remove, to_tag
|
||||
if to_remove or to_tag:
|
||||
return to_remove, to_tag
|
||||
return False
|
||||
|
|
|
@ -3,9 +3,8 @@
|
|||
"""
|
||||
video_codec and video_profile property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
|
@ -43,7 +42,8 @@ def video_codec(config): # pylint:disable=unused-argument
|
|||
|
||||
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
|
||||
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
|
||||
rebulk.defaults(name="video_profile",
|
||||
rebulk.defaults(clear=True,
|
||||
name="video_profile",
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'video_profile'))
|
||||
|
||||
|
@ -66,7 +66,8 @@ def video_codec(config): # pylint:disable=unused-argument
|
|||
rebulk.string('DXVA', value='DXVA', name='video_api',
|
||||
disabled=lambda context: is_disabled(context, 'video_api'))
|
||||
|
||||
rebulk.defaults(name='color_depth',
|
||||
rebulk.defaults(clear=True,
|
||||
name='color_depth',
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'color_depth'))
|
||||
rebulk.regex('12.?bits?', value='12-bit')
|
||||
|
|
|
@ -67,7 +67,7 @@ def website(config):
|
|||
"""
|
||||
Validator for next website matches
|
||||
"""
|
||||
return any(name in ['season', 'episode', 'year'] for name in match.names)
|
||||
return match.named('season', 'episode', 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
|
@ -80,7 +80,9 @@ def website(config):
|
|||
if not safe:
|
||||
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
|
||||
if suffix:
|
||||
to_remove.append(website_match)
|
||||
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
|
||||
if not group:
|
||||
to_remove.append(website_match)
|
||||
return to_remove
|
||||
|
||||
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)
|
||||
|
|
|
@ -35,9 +35,9 @@
|
|||
-cd: 1
|
||||
-cd_count: 3
|
||||
|
||||
? This.Is.Us
|
||||
? This.is.Us
|
||||
: options: --exclude country
|
||||
title: This Is Us
|
||||
title: This is Us
|
||||
-country: US
|
||||
|
||||
? 2015.01.31
|
||||
|
@ -286,9 +286,9 @@
|
|||
: options: --exclude website
|
||||
-website: wawa.co.uk
|
||||
|
||||
? movie.mkv
|
||||
? movie.mp4
|
||||
: options: --exclude mimetype
|
||||
-mimetype: video/x-matroska
|
||||
-mimetype: video/mp4
|
||||
|
||||
? another movie.mkv
|
||||
: options: --exclude container
|
||||
|
|
|
@ -201,9 +201,9 @@
|
|||
? Series/My Name Is Earl/My.Name.Is.Earl.S01Extras.-.Bad.Karma.DVDRip.XviD.avi
|
||||
: title: My Name Is Earl
|
||||
season: 1
|
||||
episode_title: Extras - Bad Karma
|
||||
episode_title: Bad Karma
|
||||
source: DVD
|
||||
other: Rip
|
||||
other: [Extras, Rip]
|
||||
video_codec: Xvid
|
||||
|
||||
? series/Freaks And Geeks/Season 1/Episode 4 - Kim Kelly Is My Friend-eng(1).srt
|
||||
|
@ -1917,9 +1917,11 @@
|
|||
|
||||
? Duck.Dynasty.S02E07.Streik.German.DOKU.DL.WS.DVDRiP.x264-CDP
|
||||
: episode: 7
|
||||
episode_title: Streik German
|
||||
episode_title: Streik
|
||||
source: DVD
|
||||
language: mul
|
||||
language:
|
||||
- German
|
||||
- Multi
|
||||
other: [Documentary, Widescreen, Rip]
|
||||
release_group: CDP
|
||||
season: 2
|
||||
|
@ -1930,9 +1932,11 @@
|
|||
? Family.Guy.S13E14.JOLO.German.AC3D.DL.720p.WebHD.x264-CDD
|
||||
: audio_codec: Dolby Digital
|
||||
episode: 14
|
||||
episode_title: JOLO German
|
||||
episode_title: JOLO
|
||||
source: Web
|
||||
language: mul
|
||||
language:
|
||||
- German
|
||||
- Multi
|
||||
release_group: CDD
|
||||
screen_size: 720p
|
||||
season: 13
|
||||
|
@ -3025,7 +3029,7 @@
|
|||
title: Show Name
|
||||
episode: [493, 494, 495, 496, 497, 498, 500, 501, 502, 503, 504, 505, 506, 507]
|
||||
screen_size: 720p
|
||||
subtitle_language: fr
|
||||
other: Variable Frame Rate
|
||||
video_codec: H.264
|
||||
audio_codec: AAC
|
||||
type: episode
|
||||
|
@ -4524,4 +4528,166 @@
|
|||
video_codec: H.264
|
||||
audio_codec: MP2
|
||||
release_group: KIDKAT
|
||||
type: episode
|
||||
|
||||
? Por Trece Razones - Temporada 2 [HDTV 720p][Cap.201][AC3 5.1 Castellano]/Por Trece Razones 2x01 [des202].mkv
|
||||
: title: Por Trece Razones
|
||||
season: 2
|
||||
source: HDTV
|
||||
screen_size: 720p
|
||||
episode: 1
|
||||
audio_codec: Dolby Digital
|
||||
audio_channels: '5.1'
|
||||
language: Catalan
|
||||
release_group: des202
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Cuerpo de Elite - Temporada 1 [HDTV 720p][Cap.113][AC3 5.1 Esp Castellano]\CuerpoDeElite720p_113_desca202.mkv
|
||||
: title: Cuerpo de Elite
|
||||
season: 1
|
||||
source: HDTV
|
||||
screen_size: 720p
|
||||
episode: 13
|
||||
audio_codec: Dolby Digital
|
||||
audio_channels: '5.1'
|
||||
language:
|
||||
- Spanish
|
||||
- Catalan
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Show.Name.S01E01.St.Patricks.Day.1080p.mkv
|
||||
: title: Show Name
|
||||
season: 1
|
||||
episode: 1
|
||||
episode_title: St Patricks Day
|
||||
screen_size: 1080p
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Show.Name.S01E01.St.Patricks.Day.1080p-grp.mkv
|
||||
: title: Show Name
|
||||
season: 1
|
||||
episode: 1
|
||||
episode_title: St Patricks Day
|
||||
screen_size: 1080p
|
||||
release_group: grp
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Titans.2018.S01E09.Hank.And.Dawn.720p.DCU.WEB-DL.AAC2.0.H264-NTb
|
||||
: title: Titans
|
||||
year: 2018
|
||||
season: 1
|
||||
episode: 9
|
||||
episode_title: Hank And Dawn
|
||||
screen_size: 720p
|
||||
streaming_service: DC Universe
|
||||
source: Web
|
||||
audio_codec: AAC
|
||||
audio_channels: '2.0'
|
||||
video_codec: H.264
|
||||
release_group: NTb
|
||||
type: episode
|
||||
|
||||
? S.W.A.T.2017.S01E21.Treibjagd.German.Dubbed.DL.AmazonHD.x264-TVS
|
||||
: title: S.W.A.T.
|
||||
year: 2017
|
||||
season: 1
|
||||
episode: 21
|
||||
episode_title: Treibjagd
|
||||
language:
|
||||
- German
|
||||
- Multi
|
||||
streaming_service: Amazon Prime
|
||||
other: HD
|
||||
video_codec: H.264
|
||||
release_group: TVS
|
||||
type: episode
|
||||
|
||||
? S.W.A.T.2017.S01E16.READNFO.720p.HDTV.x264-KILLERS
|
||||
: title: S.W.A.T.
|
||||
year: 2017
|
||||
season: 1
|
||||
episode: 16
|
||||
other: Read NFO
|
||||
screen_size: 720p
|
||||
source: HDTV
|
||||
video_codec: H.264
|
||||
release_group: KILLERS
|
||||
type: episode
|
||||
|
||||
? /mnt/NAS/NoSubsTVShows/Babylon 5/Season 01/Ep. 02 - Soul Hunter
|
||||
: title: Babylon 5
|
||||
season: 1
|
||||
episode: 2
|
||||
episode_title: Soul Hunter
|
||||
type: episode
|
||||
|
||||
? This.is.Us.S01E01.HDTV.x264-KILLERS.mkv
|
||||
: title: This is Us
|
||||
season: 1
|
||||
episode: 1
|
||||
source: HDTV
|
||||
video_codec: H.264
|
||||
release_group: KILLERS
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Videos/Office1080/The Office (US) (2005) Season 2 S02 + Extras (1080p AMZN WEB-DL x265 HEVC 10bit AAC 2.0 LION)/The Office (US) (2005) - S02E12 - The Injury (1080p AMZN WEB-DL x265 LION).mkv
|
||||
: title: The Office
|
||||
country: US
|
||||
year: 2005
|
||||
season: 2
|
||||
other: Extras
|
||||
screen_size: 1080p
|
||||
streaming_service: Amazon Prime
|
||||
source: Web
|
||||
video_codec: H.265
|
||||
video_profile: High Efficiency Video Coding
|
||||
color_depth: 10-bit
|
||||
audio_codec: AAC
|
||||
audio_channels: '2.0'
|
||||
release_group: LION
|
||||
episode: 12
|
||||
episode_title: The Injury
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Thumping.Spike.2.E01.DF.WEBRip.720p-DRAMATV.mp4
|
||||
: title: Thumping Spike 2
|
||||
episode: 1
|
||||
source: Web
|
||||
other: Rip
|
||||
screen_size: 720p
|
||||
streaming_service: DramaFever
|
||||
release_group: DRAMATV
|
||||
container: mp4
|
||||
mimetype: video/mp4
|
||||
type: episode
|
||||
|
||||
? About.Time.E01.1080p.VIKI.WEB-DL-BLUEBERRY.mp4
|
||||
: title: About Time
|
||||
episode: 1
|
||||
screen_size: 1080p
|
||||
streaming_service: Viki
|
||||
source: Web
|
||||
release_group: BLUEBERRY
|
||||
container: mp4
|
||||
mimetype: video/mp4
|
||||
type: episode
|
||||
|
||||
? Eyes.Of.Dawn.1991.E01.480p.MBCVOD.AAC.x264-NOGPR.mp4
|
||||
: title: Eyes Of Dawn
|
||||
year: 1991
|
||||
season: 1991
|
||||
episode: 1
|
||||
screen_size: 480p
|
||||
streaming_service: MBC
|
||||
audio_codec: AAC
|
||||
video_codec: H.264
|
||||
release_group: NOGPR
|
||||
container: mp4
|
||||
mimetype: video/mp4
|
||||
type: episode
|
|
@ -815,10 +815,12 @@
|
|||
? Das.Appartement.German.AC3D.DL.720p.BluRay.x264-TVP
|
||||
: audio_codec: Dolby Digital
|
||||
source: Blu-ray
|
||||
language: mul
|
||||
language:
|
||||
- German
|
||||
- Multi
|
||||
release_group: TVP
|
||||
screen_size: 720p
|
||||
title: Das Appartement German
|
||||
title: Das Appartement
|
||||
type: movie
|
||||
video_codec: H.264
|
||||
|
||||
|
@ -1723,7 +1725,7 @@
|
|||
? Ant-Man.and.the.Wasp.2018.Digital.Extras.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv
|
||||
: title: Ant-Man and the Wasp
|
||||
year: 2018
|
||||
alternative_title: Digital Extras
|
||||
other: Extras
|
||||
screen_size: 1080p
|
||||
streaming_service: Amazon Prime
|
||||
source: Web
|
||||
|
@ -1770,4 +1772,15 @@
|
|||
audio_channels: '5.1'
|
||||
video_codec: H.264
|
||||
release_group: CMRG
|
||||
type: movie
|
||||
type: movie
|
||||
|
||||
? The.Girl.in.the.Spiders.Web.2019.1080p.WEB-DL.x264.AC3-EVO.mkv
|
||||
: title: The Girl in the Spiders Web
|
||||
year: 2019
|
||||
screen_size: 1080p
|
||||
source: Web
|
||||
video_codec: H.264
|
||||
audio_codec: Dolby Digital
|
||||
release_group: EVO
|
||||
container: mkv
|
||||
type: movie
|
||||
|
|
467
libs/common/guessit/test/rules/common_words.yml
Normal file
467
libs/common/guessit/test/rules/common_words.yml
Normal file
|
@ -0,0 +1,467 @@
|
|||
? is
|
||||
: title: is
|
||||
|
||||
? it
|
||||
: title: it
|
||||
|
||||
? am
|
||||
: title: am
|
||||
|
||||
? mad
|
||||
: title: mad
|
||||
|
||||
? men
|
||||
: title: men
|
||||
|
||||
? man
|
||||
: title: man
|
||||
|
||||
? run
|
||||
: title: run
|
||||
|
||||
? sin
|
||||
: title: sin
|
||||
|
||||
? st
|
||||
: title: st
|
||||
|
||||
? to
|
||||
: title: to
|
||||
|
||||
? 'no'
|
||||
: title: 'no'
|
||||
|
||||
? non
|
||||
: title: non
|
||||
|
||||
? war
|
||||
: title: war
|
||||
|
||||
? min
|
||||
: title: min
|
||||
|
||||
? new
|
||||
: title: new
|
||||
|
||||
? car
|
||||
: title: car
|
||||
|
||||
? day
|
||||
: title: day
|
||||
|
||||
? bad
|
||||
: title: bad
|
||||
|
||||
? bat
|
||||
: title: bat
|
||||
|
||||
? fan
|
||||
: title: fan
|
||||
|
||||
? fry
|
||||
: title: fry
|
||||
|
||||
? cop
|
||||
: title: cop
|
||||
|
||||
? zen
|
||||
: title: zen
|
||||
|
||||
? gay
|
||||
: title: gay
|
||||
|
||||
? fat
|
||||
: title: fat
|
||||
|
||||
? one
|
||||
: title: one
|
||||
|
||||
? cherokee
|
||||
: title: cherokee
|
||||
|
||||
? got
|
||||
: title: got
|
||||
|
||||
? an
|
||||
: title: an
|
||||
|
||||
? as
|
||||
: title: as
|
||||
|
||||
? cat
|
||||
: title: cat
|
||||
|
||||
? her
|
||||
: title: her
|
||||
|
||||
? be
|
||||
: title: be
|
||||
|
||||
? hat
|
||||
: title: hat
|
||||
|
||||
? sun
|
||||
: title: sun
|
||||
|
||||
? may
|
||||
: title: may
|
||||
|
||||
? my
|
||||
: title: my
|
||||
|
||||
? mr
|
||||
: title: mr
|
||||
|
||||
? rum
|
||||
: title: rum
|
||||
|
||||
? pi
|
||||
: title: pi
|
||||
|
||||
? bb
|
||||
: title: bb
|
||||
|
||||
? bt
|
||||
: title: bt
|
||||
|
||||
? tv
|
||||
: title: tv
|
||||
|
||||
? aw
|
||||
: title: aw
|
||||
|
||||
? by
|
||||
: title: by
|
||||
|
||||
? md
|
||||
: other: Mic Dubbed
|
||||
|
||||
? mp
|
||||
: title: mp
|
||||
|
||||
? cd
|
||||
: title: cd
|
||||
|
||||
? in
|
||||
: title: in
|
||||
|
||||
? ad
|
||||
: title: ad
|
||||
|
||||
? ice
|
||||
: title: ice
|
||||
|
||||
? ay
|
||||
: title: ay
|
||||
|
||||
? at
|
||||
: title: at
|
||||
|
||||
? star
|
||||
: title: star
|
||||
|
||||
? so
|
||||
: title: so
|
||||
|
||||
? he
|
||||
: title: he
|
||||
|
||||
? do
|
||||
: title: do
|
||||
|
||||
? ax
|
||||
: title: ax
|
||||
|
||||
? mx
|
||||
: title: mx
|
||||
|
||||
? bas
|
||||
: title: bas
|
||||
|
||||
? de
|
||||
: title: de
|
||||
|
||||
? le
|
||||
: title: le
|
||||
|
||||
? son
|
||||
: title: son
|
||||
|
||||
? ne
|
||||
: title: ne
|
||||
|
||||
? ca
|
||||
: title: ca
|
||||
|
||||
? ce
|
||||
: title: ce
|
||||
|
||||
? et
|
||||
: title: et
|
||||
|
||||
? que
|
||||
: title: que
|
||||
|
||||
? mal
|
||||
: title: mal
|
||||
|
||||
? est
|
||||
: title: est
|
||||
|
||||
? vol
|
||||
: title: vol
|
||||
|
||||
? or
|
||||
: title: or
|
||||
|
||||
? mon
|
||||
: title: mon
|
||||
|
||||
? se
|
||||
: title: se
|
||||
|
||||
? je
|
||||
: title: je
|
||||
|
||||
? tu
|
||||
: title: tu
|
||||
|
||||
? me
|
||||
: title: me
|
||||
|
||||
? ma
|
||||
: title: ma
|
||||
|
||||
? va
|
||||
: title: va
|
||||
|
||||
? au
|
||||
: country: AU
|
||||
|
||||
? lu
|
||||
: title: lu
|
||||
|
||||
? wa
|
||||
: title: wa
|
||||
|
||||
? ga
|
||||
: title: ga
|
||||
|
||||
? ao
|
||||
: title: ao
|
||||
|
||||
? la
|
||||
: title: la
|
||||
|
||||
? el
|
||||
: title: el
|
||||
|
||||
? del
|
||||
: title: del
|
||||
|
||||
? por
|
||||
: title: por
|
||||
|
||||
? mar
|
||||
: title: mar
|
||||
|
||||
? al
|
||||
: title: al
|
||||
|
||||
? un
|
||||
: title: un
|
||||
|
||||
? ind
|
||||
: title: ind
|
||||
|
||||
? arw
|
||||
: title: arw
|
||||
|
||||
? ts
|
||||
: source: Telesync
|
||||
|
||||
? ii
|
||||
: title: ii
|
||||
|
||||
? bin
|
||||
: title: bin
|
||||
|
||||
? chan
|
||||
: title: chan
|
||||
|
||||
? ss
|
||||
: title: ss
|
||||
|
||||
? san
|
||||
: title: san
|
||||
|
||||
? oss
|
||||
: title: oss
|
||||
|
||||
? iii
|
||||
: title: iii
|
||||
|
||||
? vi
|
||||
: title: vi
|
||||
|
||||
? ben
|
||||
: title: ben
|
||||
|
||||
? da
|
||||
: title: da
|
||||
|
||||
? lt
|
||||
: title: lt
|
||||
|
||||
? ch
|
||||
: title: ch
|
||||
|
||||
? sr
|
||||
: title: sr
|
||||
|
||||
? ps
|
||||
: title: ps
|
||||
|
||||
? cx
|
||||
: title: cx
|
||||
|
||||
? vo
|
||||
: title: vo
|
||||
|
||||
? mkv
|
||||
: container: mkv
|
||||
|
||||
? avi
|
||||
: container: avi
|
||||
|
||||
? dmd
|
||||
: title: dmd
|
||||
|
||||
? the
|
||||
: title: the
|
||||
|
||||
? dis
|
||||
: title: dis
|
||||
|
||||
? cut
|
||||
: title: cut
|
||||
|
||||
? stv
|
||||
: title: stv
|
||||
|
||||
? des
|
||||
: title: des
|
||||
|
||||
? dia
|
||||
: title: dia
|
||||
|
||||
? and
|
||||
: title: and
|
||||
|
||||
? cab
|
||||
: title: cab
|
||||
|
||||
? sub
|
||||
: title: sub
|
||||
|
||||
? mia
|
||||
: title: mia
|
||||
|
||||
? rim
|
||||
: title: rim
|
||||
|
||||
? las
|
||||
: title: las
|
||||
|
||||
? une
|
||||
: title: une
|
||||
|
||||
? par
|
||||
: title: par
|
||||
|
||||
? srt
|
||||
: container: srt
|
||||
|
||||
? ano
|
||||
: title: ano
|
||||
|
||||
? toy
|
||||
: title: toy
|
||||
|
||||
? job
|
||||
: title: job
|
||||
|
||||
? gag
|
||||
: title: gag
|
||||
|
||||
? reel
|
||||
: title: reel
|
||||
|
||||
? www
|
||||
: title: www
|
||||
|
||||
? for
|
||||
: title: for
|
||||
|
||||
? ayu
|
||||
: title: ayu
|
||||
|
||||
? csi
|
||||
: title: csi
|
||||
|
||||
? ren
|
||||
: title: ren
|
||||
|
||||
? moi
|
||||
: title: moi
|
||||
|
||||
? sur
|
||||
: title: sur
|
||||
|
||||
? fer
|
||||
: title: fer
|
||||
|
||||
? fun
|
||||
: title: fun
|
||||
|
||||
? two
|
||||
: title: two
|
||||
|
||||
? big
|
||||
: title: big
|
||||
|
||||
? psy
|
||||
: title: psy
|
||||
|
||||
? air
|
||||
: title: air
|
||||
|
||||
? brazil
|
||||
: title: brazil
|
||||
|
||||
? jordan
|
||||
: title: jordan
|
||||
|
||||
? bs
|
||||
: title: bs
|
||||
|
||||
? kz
|
||||
: title: kz
|
||||
|
||||
? gt
|
||||
: title: gt
|
||||
|
||||
? im
|
||||
: title: im
|
||||
|
||||
? pt
|
||||
: language: pt
|
||||
|
||||
? scr
|
||||
: title: scr
|
||||
|
||||
? sd
|
||||
: title: sd
|
||||
|
||||
? hr
|
||||
: other: High Resolution
|
|
@ -5,8 +5,8 @@
|
|||
: country: US
|
||||
title: this is title
|
||||
|
||||
? This.is.us.title
|
||||
: title: This is us title
|
||||
? This.is.Us
|
||||
: title: This is Us
|
||||
|
||||
? This.Is.Us
|
||||
: options: --no-default-config
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
proper_count: 3
|
||||
|
||||
|
||||
? Proper
|
||||
? Proper.720p
|
||||
? +Repack
|
||||
? +Rerip
|
||||
: other: Proper
|
||||
|
@ -80,7 +80,7 @@
|
|||
? Remux
|
||||
: other: Remux
|
||||
|
||||
? 3D
|
||||
? 3D.2019
|
||||
: other: 3D
|
||||
|
||||
? HD
|
||||
|
|
21
libs/common/guessit/test/suggested.json
Normal file
21
libs/common/guessit/test/suggested.json
Normal file
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"titles": [
|
||||
"13 Reasons Why",
|
||||
"Star Wars: Episode VII - The Force Awakens",
|
||||
"3%",
|
||||
"The 100",
|
||||
"3 Percent",
|
||||
"This is Us",
|
||||
"Open Season 2",
|
||||
"Game of Thrones",
|
||||
"The X-Files",
|
||||
"11.22.63"
|
||||
],
|
||||
"suggested": [
|
||||
"13 Reasons Why",
|
||||
"Star Wars: Episode VII - The Force Awakens",
|
||||
"The 100",
|
||||
"Open Season 2",
|
||||
"11.22.63"
|
||||
]
|
||||
}
|
|
@ -1,13 +1,14 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
import six
|
||||
|
||||
from ..api import guessit, properties, GuessitException
|
||||
from ..api import guessit, properties, suggested_expected, GuessitException
|
||||
|
||||
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||
|
||||
|
@ -27,12 +28,16 @@ def test_forced_binary():
|
|||
assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type)
|
||||
|
||||
|
||||
@pytest.mark.skipif('sys.version_info < (3, 4)', reason="Path is not available")
|
||||
@pytest.mark.skipif(sys.version_info < (3, 4), reason="Path is not available")
|
||||
def test_pathlike_object():
|
||||
from pathlib import Path
|
||||
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
|
||||
ret = guessit(path)
|
||||
assert ret and 'title' in ret
|
||||
try:
|
||||
from pathlib import Path
|
||||
|
||||
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
|
||||
ret = guessit(path)
|
||||
assert ret and 'title' in ret
|
||||
except ImportError: # pragma: no-cover
|
||||
pass
|
||||
|
||||
|
||||
def test_unicode_japanese():
|
||||
|
@ -69,3 +74,10 @@ def test_exception():
|
|||
assert "An internal error has occured in guessit" in str(excinfo.value)
|
||||
assert "Guessit Exception Report" in str(excinfo.value)
|
||||
assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value)
|
||||
|
||||
|
||||
def test_suggested_expected():
|
||||
with open(os.path.join(__location__, 'suggested.json'), 'r') as f:
|
||||
content = json.load(f)
|
||||
actual = suggested_expected(content['titles'])
|
||||
assert actual == content['suggested']
|
||||
|
|
|
@ -7,9 +7,8 @@ import os
|
|||
from io import open # pylint: disable=redefined-builtin
|
||||
|
||||
import babelfish
|
||||
import pytest
|
||||
import six
|
||||
import yaml
|
||||
import six # pylint:disable=wrong-import-order
|
||||
import yaml # pylint:disable=wrong-import-order
|
||||
from rebulk.remodule import re
|
||||
from rebulk.utils import is_iterable
|
||||
|
||||
|
@ -21,13 +20,6 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||
|
||||
filename_predicate = None
|
||||
string_predicate = None
|
||||
|
||||
|
||||
# filename_predicate = lambda filename: 'episode_title' in filename
|
||||
# string_predicate = lambda string: '-DVD.BlablaBla.Fix.Blablabla.XVID' in string
|
||||
|
||||
|
||||
class EntryResult(object):
|
||||
def __init__(self, string, negates=False):
|
||||
|
@ -134,7 +126,49 @@ class TestYml(object):
|
|||
|
||||
options_re = re.compile(r'^([ +-]+)(.*)')
|
||||
|
||||
files, ids = files_and_ids(filename_predicate)
|
||||
def _get_unique_id(self, collection, base_id):
|
||||
ret = base_id
|
||||
i = 2
|
||||
while ret in collection:
|
||||
suffix = "-" + str(i)
|
||||
ret = base_id + suffix
|
||||
i += 1
|
||||
return ret
|
||||
|
||||
def pytest_generate_tests(self, metafunc):
|
||||
if 'yml_test_case' in metafunc.fixturenames:
|
||||
entries = []
|
||||
entry_ids = []
|
||||
entry_set = set()
|
||||
|
||||
for filename, _ in zip(*files_and_ids()):
|
||||
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
|
||||
data = yaml.load(infile, OrderedDictYAMLLoader)
|
||||
|
||||
last_expected = None
|
||||
for string, expected in reversed(list(data.items())):
|
||||
if expected is None:
|
||||
data[string] = last_expected
|
||||
else:
|
||||
last_expected = expected
|
||||
|
||||
default = None
|
||||
try:
|
||||
default = data['__default__']
|
||||
del data['__default__']
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for string, expected in data.items():
|
||||
TestYml.set_default(expected, default)
|
||||
string = TestYml.fix_encoding(string, expected)
|
||||
|
||||
entries.append((filename, string, expected))
|
||||
unique_id = self._get_unique_id(entry_set, '[' + filename + '] ' + str(string))
|
||||
entry_set.add(unique_id)
|
||||
entry_ids.append(unique_id)
|
||||
|
||||
metafunc.parametrize('yml_test_case', entries, ids=entry_ids)
|
||||
|
||||
@staticmethod
|
||||
def set_default(expected, default):
|
||||
|
@ -143,34 +177,8 @@ class TestYml(object):
|
|||
if k not in expected:
|
||||
expected[k] = v
|
||||
|
||||
@pytest.mark.parametrize('filename', files, ids=ids)
|
||||
def test(self, filename, caplog):
|
||||
caplog.set_level(logging.INFO)
|
||||
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
|
||||
data = yaml.load(infile, OrderedDictYAMLLoader)
|
||||
entries = Results()
|
||||
|
||||
last_expected = None
|
||||
for string, expected in reversed(list(data.items())):
|
||||
if expected is None:
|
||||
data[string] = last_expected
|
||||
else:
|
||||
last_expected = expected
|
||||
|
||||
default = None
|
||||
try:
|
||||
default = data['__default__']
|
||||
del data['__default__']
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for string, expected in data.items():
|
||||
TestYml.set_default(expected, default)
|
||||
entry = self.check_data(filename, string, expected)
|
||||
entries.append(entry)
|
||||
entries.assert_ok()
|
||||
|
||||
def check_data(self, filename, string, expected):
|
||||
@classmethod
|
||||
def fix_encoding(cls, string, expected):
|
||||
if six.PY2:
|
||||
if isinstance(string, six.text_type):
|
||||
string = string.encode('utf-8')
|
||||
|
@ -183,16 +191,23 @@ class TestYml(object):
|
|||
expected[k] = v
|
||||
if not isinstance(string, str):
|
||||
string = str(string)
|
||||
if not string_predicate or string_predicate(string): # pylint: disable=not-callable
|
||||
entry = self.check(string, expected)
|
||||
if entry.ok:
|
||||
logger.debug('[%s] %s', filename, entry)
|
||||
elif entry.warning:
|
||||
logger.warning('[%s] %s', filename, entry)
|
||||
elif entry.error:
|
||||
logger.error('[%s] %s', filename, entry)
|
||||
for line in entry.details:
|
||||
logger.error('[%s] %s', filename, ' ' * 4 + line)
|
||||
return string
|
||||
|
||||
def test_entry(self, yml_test_case):
|
||||
filename, string, expected = yml_test_case
|
||||
result = self.check_data(filename, string, expected)
|
||||
assert not result.error
|
||||
|
||||
def check_data(self, filename, string, expected):
|
||||
entry = self.check(string, expected)
|
||||
if entry.ok:
|
||||
logger.debug('[%s] %s', filename, entry)
|
||||
elif entry.warning:
|
||||
logger.warning('[%s] %s', filename, entry)
|
||||
elif entry.error:
|
||||
logger.error('[%s] %s', filename, entry)
|
||||
for line in entry.details:
|
||||
logger.error('[%s] %s', filename, ' ' * 4 + line)
|
||||
return entry
|
||||
|
||||
def check(self, string, expected):
|
||||
|
|
|
@ -946,3 +946,254 @@
|
|||
source: Blu-ray
|
||||
audio_codec: DTS-HD
|
||||
type: movie
|
||||
|
||||
? Mr Robot - S03E01 - eps3 0 power-saver-mode h (1080p AMZN WEB-DL x265 HEVC 10bit EAC3 6.0 RCVR).mkv
|
||||
: title: Mr Robot
|
||||
season: 3
|
||||
episode: 1
|
||||
episode_title: eps3 0 power-saver-mode h
|
||||
screen_size: 1080p
|
||||
streaming_service: Amazon Prime
|
||||
source: Web
|
||||
video_codec: H.265
|
||||
video_profile: High Efficiency Video Coding
|
||||
color_depth: 10-bit
|
||||
audio_codec: Dolby Digital Plus
|
||||
audio_channels: '5.1'
|
||||
release_group: RCVR
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Panorama.15-05-2018.Web-DL.540p.H264.AAC.Subs.mp4
|
||||
: title: Panorama
|
||||
date: 2018-05-15
|
||||
source: Web
|
||||
screen_size: 540p
|
||||
video_codec: H.264
|
||||
audio_codec: AAC
|
||||
subtitle_language: und
|
||||
container: mp4
|
||||
type: episode
|
||||
|
||||
? Shaolin 2011.720p.BluRay.x264-x0r.mkv
|
||||
: title: Shaolin
|
||||
year: 2011
|
||||
screen_size: 720p
|
||||
source: Blu-ray
|
||||
video_codec: H.264
|
||||
release_group: x0r
|
||||
container: mkv
|
||||
type: movie
|
||||
|
||||
? '[ Engineering Catastrophes S02E10 1080p AMZN WEB-DL DD+ 2.0 x264-TrollHD ]'
|
||||
: title: Engineering Catastrophes
|
||||
season: 2
|
||||
episode: 10
|
||||
screen_size: 1080p
|
||||
streaming_service: Amazon Prime
|
||||
source: Web
|
||||
audio_codec: Dolby Digital Plus
|
||||
audio_channels: '2.0'
|
||||
video_codec: H.264
|
||||
release_group: TrollHD
|
||||
type: episode
|
||||
|
||||
? A Very Harold & Kumar 3D Christmas (2011).mkv
|
||||
: title: A Very Harold & Kumar 3D Christmas
|
||||
year: 2011
|
||||
container: mkv
|
||||
type: movie
|
||||
|
||||
? Cleveland.Hustles.S01E03.Downward.Dogs.and.Proper.Pigs.720p.HDTV.x264-W4F
|
||||
: title: Cleveland Hustles
|
||||
season: 1
|
||||
episode: 3
|
||||
episode_title: Downward Dogs and Proper Pigs
|
||||
screen_size: 720p
|
||||
source: HDTV
|
||||
video_codec: H.264
|
||||
release_group: W4F
|
||||
type: episode
|
||||
|
||||
? Pawn.Stars.S12E20.The.Pawn.Awakens.REAL.READ.NFO.720p.HDTV.x264-DHD
|
||||
: title: Pawn Stars
|
||||
season: 12
|
||||
episode: 20
|
||||
episode_title: The Pawn Awakens
|
||||
other:
|
||||
- Proper
|
||||
- Read NFO
|
||||
proper_count: 2
|
||||
screen_size: 720p
|
||||
source: HDTV
|
||||
video_codec: H.264
|
||||
release_group: DHD
|
||||
type: episode
|
||||
|
||||
? Pawn.Stars.S12E22.Racing.Revolution.REAL.720p.HDTV.x264-DHD
|
||||
: title: Pawn Stars
|
||||
season: 12
|
||||
episode: 22
|
||||
episode_title: Racing Revolution
|
||||
other: Proper
|
||||
proper_count: 2
|
||||
screen_size: 720p
|
||||
source: HDTV
|
||||
video_codec: H.264
|
||||
release_group: DHD
|
||||
type: episode
|
||||
|
||||
? Luksusfellen.S18E02.REAL.NORWEGiAN.720p.WEB.h264-NORPiLT
|
||||
: title: Luksusfellen
|
||||
season: 18
|
||||
episode: 2
|
||||
other: Proper
|
||||
proper_count: 2
|
||||
language: Norwegian
|
||||
screen_size: 720p
|
||||
source: Web
|
||||
video_codec: H.264
|
||||
release_group: NORPiLT
|
||||
type: episode
|
||||
|
||||
? The.Exorcist.S02E07.REAL.FRENCH.720p.HDTV.x264-SH0W
|
||||
: title: The Exorcist
|
||||
season: 2
|
||||
episode: 7
|
||||
other: Proper
|
||||
proper_count: 2
|
||||
language: fr
|
||||
screen_size: 720p
|
||||
source: HDTV
|
||||
video_codec: H.264
|
||||
release_group: SH0W
|
||||
type: episode
|
||||
|
||||
? Outrageous.Acts.of.Science.S05E02.Is.This.for.Real.720p.HDTV.x264-DHD
|
||||
: title: Outrageous Acts of Science
|
||||
season: 5
|
||||
episode: 2
|
||||
# corner case
|
||||
# episode_title: Is This for Real
|
||||
screen_size: 720p
|
||||
source: HDTV
|
||||
video_codec: H.264
|
||||
release_group: DHD
|
||||
type: episode
|
||||
|
||||
? How.the.Universe.Works.S06E08.Strange.Lives.of.Dwarf.Planets.REAL.720p.WEB.x264-DHD
|
||||
: title: How the Universe Works
|
||||
season: 6
|
||||
episode: 8
|
||||
episode_title: Strange Lives of Dwarf Planets
|
||||
other: Proper
|
||||
proper_count: 2
|
||||
screen_size: 720p
|
||||
source: Web
|
||||
video_codec: H.264
|
||||
release_group: DHD
|
||||
type: episode
|
||||
|
||||
? Vampirina.S01E16.REAL.HDTV.x264-W4F
|
||||
: title: Vampirina
|
||||
season: 1
|
||||
episode: 16
|
||||
other: Proper
|
||||
proper_count: 2
|
||||
source: HDTV
|
||||
video_codec: H.264
|
||||
release_group: W4F
|
||||
type: episode
|
||||
|
||||
? Test.S01E16.Some Real Episode Title.HDTV.x264-W4F
|
||||
: title: Test
|
||||
season: 1
|
||||
episode: 16
|
||||
episode_title: Some Real Episode Title
|
||||
source: HDTV
|
||||
video_codec: H.264
|
||||
release_group: W4F
|
||||
type: episode
|
||||
|
||||
? NOS4A2.S01E01.The.Shorter.Way.REPACK.720p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv
|
||||
: title: NOS4A2
|
||||
season: 1
|
||||
episode: 1
|
||||
episode_title: The Shorter Way
|
||||
other: Proper
|
||||
proper_count: 1
|
||||
screen_size: 720p
|
||||
streaming_service: Amazon Prime
|
||||
source: Web
|
||||
audio_codec: Dolby Digital Plus
|
||||
audio_channels: '5.1'
|
||||
video_codec: H.264
|
||||
release_group: NTG
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Star Trek DS9 Ep 2x03 The Siege (Part III)
|
||||
: title: Star Trek DS9
|
||||
season: 2
|
||||
episode: 3
|
||||
episode_title: The Siege
|
||||
part: 3
|
||||
type: episode
|
||||
|
||||
? The.Red.Line.S01E01
|
||||
: title: The Red Line
|
||||
season: 1
|
||||
episode: 1
|
||||
type: episode
|
||||
|
||||
? Show.S01E01.WEB.x264-METCON.mkv
|
||||
: title: Show
|
||||
season: 1
|
||||
episode: 1
|
||||
source: Web
|
||||
video_codec: H.264
|
||||
release_group: METCON
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Show.S01E01.WEB.x264-TCMEON.mkv
|
||||
: title: Show
|
||||
season: 1
|
||||
episode: 1
|
||||
source: Web
|
||||
video_codec: H.264
|
||||
release_group: TCMEON
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? Show.S01E01.WEB.x264-MEONTC.mkv
|
||||
: title: Show
|
||||
season: 1
|
||||
episode: 1
|
||||
source: Web
|
||||
video_codec: H.264
|
||||
release_group: MEONTC
|
||||
container: mkv
|
||||
type: episode
|
||||
|
||||
? '[TorrentCouch.com].Westworld.S02.Complete.720p.WEB-DL.x264.[MP4].[5.3GB].[Season.2.Full]/[TorrentCouch.com].Westworld.S02E03.720p.WEB-DL.x264.mp4'
|
||||
: website: TorrentCouch.com
|
||||
title: Westworld
|
||||
season: 2
|
||||
other: Complete
|
||||
screen_size: 720p
|
||||
source: Web
|
||||
video_codec: H.264
|
||||
container: mp4
|
||||
size: 5.3GB
|
||||
episode: 3
|
||||
type: episode
|
||||
|
||||
? Vita.&.Virginia.2018.720p.H.264.YTS.LT.mp4
|
||||
: title: Vita & Virginia
|
||||
year: 2018
|
||||
screen_size: 720p
|
||||
video_codec: H.264
|
||||
release_group: YTS.LT
|
||||
container: mp4
|
||||
type: movie
|
|
@ -10,19 +10,19 @@ except ImportError: # pragma: no-cover
|
|||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
import babelfish
|
||||
|
||||
import yaml
|
||||
import yaml # pylint:disable=wrong-import-order
|
||||
|
||||
from .rules.common.quantity import BitRate, FrameRate, Size
|
||||
|
||||
|
||||
class OrderedDictYAMLLoader(yaml.Loader):
|
||||
class OrderedDictYAMLLoader(yaml.SafeLoader):
|
||||
"""
|
||||
A YAML loader that loads mappings into ordered dictionaries.
|
||||
From https://gist.github.com/enaeseth/844388
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
yaml.Loader.__init__(self, *args, **kwargs)
|
||||
yaml.SafeLoader.__init__(self, *args, **kwargs)
|
||||
|
||||
self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)
|
||||
self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map)
|
||||
|
@ -58,7 +58,7 @@ class CustomDumper(yaml.SafeDumper):
|
|||
"""
|
||||
Custom YAML Dumper.
|
||||
"""
|
||||
pass
|
||||
pass # pylint:disable=unnecessary-pass
|
||||
|
||||
|
||||
def default_representer(dumper, data):
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '1.0.0'
|
||||
__version__ = '2.0.1'
|
||||
|
|
217
libs/common/rebulk/builder.py
Normal file
217
libs/common/rebulk/builder.py
Normal file
|
@ -0,0 +1,217 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Base builder class for Rebulk
|
||||
"""
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from copy import deepcopy
|
||||
from logging import getLogger
|
||||
|
||||
from six import add_metaclass
|
||||
|
||||
from .loose import set_defaults
|
||||
from .pattern import RePattern, StringPattern, FunctionalPattern
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
@add_metaclass(ABCMeta)
|
||||
class Builder(object):
|
||||
"""
|
||||
Base builder class for patterns
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
self._chain_defaults = {}
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset all defaults.
|
||||
|
||||
:return:
|
||||
"""
|
||||
self.__init__()
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._defaults, override=True)
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._regex_defaults, override=True)
|
||||
return self
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._string_defaults, override=True)
|
||||
return self
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._functional_defaults, override=True)
|
||||
return self
|
||||
|
||||
def chain_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for patterns chain.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._chain_defaults, override=True)
|
||||
return self
|
||||
|
||||
def build_re(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new regular expression pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return RePattern(*pattern, **kwargs)
|
||||
|
||||
def build_string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._string_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return StringPattern(*pattern, **kwargs)
|
||||
|
||||
def build_functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return FunctionalPattern(*pattern, **kwargs)
|
||||
|
||||
def build_chain(self, **kwargs):
|
||||
"""
|
||||
Builds a new patterns chain
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
from .chain import Chain
|
||||
set_defaults(self._chain_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
chain = Chain(self, **kwargs)
|
||||
chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
|
||||
chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
|
||||
chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
|
||||
chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
|
||||
chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
|
||||
return chain
|
||||
|
||||
@abstractmethod
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
Register a list of Pattern instance
|
||||
:param pattern:
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return self.pattern(self.build_re(*pattern, **kwargs))
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return self.pattern(self.build_string(*pattern, **kwargs))
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
functional = self.build_functional(*pattern, **kwargs)
|
||||
return self.pattern(functional)
|
||||
|
||||
def chain(self, **kwargs):
|
||||
"""
|
||||
Add patterns chain, using configuration of this rebulk
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
chain = self.build_chain(**kwargs)
|
||||
self.pattern(chain)
|
||||
return chain
|
|
@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group
|
|||
# pylint: disable=super-init-not-called
|
||||
import itertools
|
||||
|
||||
from .loose import call, set_defaults
|
||||
from .builder import Builder
|
||||
from .loose import call
|
||||
from .match import Match, Matches
|
||||
from .pattern import Pattern, filter_match_kwargs
|
||||
from .pattern import Pattern, filter_match_kwargs, BasePattern
|
||||
from .remodule import re
|
||||
|
||||
|
||||
|
@ -19,150 +20,46 @@ class _InvalidChainException(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class Chain(Pattern):
|
||||
class Chain(Pattern, Builder):
|
||||
"""
|
||||
Definition of a pattern chain to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, rebulk, chain_breaker=None, **kwargs):
|
||||
call(super(Chain, self).__init__, **kwargs)
|
||||
def __init__(self, parent, chain_breaker=None, **kwargs):
|
||||
Builder.__init__(self)
|
||||
call(Pattern.__init__, self, **kwargs)
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
if callable(chain_breaker):
|
||||
self.chain_breaker = chain_breaker
|
||||
else:
|
||||
self.chain_breaker = None
|
||||
self.rebulk = rebulk
|
||||
self.parent = parent
|
||||
self.parts = []
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._regex_defaults = kwargs
|
||||
return self
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._string_defaults = kwargs
|
||||
return self
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._functional_defaults = kwargs
|
||||
return self
|
||||
|
||||
def chain(self):
|
||||
"""
|
||||
Add patterns chain, using configuration from this chain
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint: disable=protected-access
|
||||
chain = self.rebulk.chain(**self._kwargs)
|
||||
chain._defaults = dict(self._defaults)
|
||||
chain._regex_defaults = dict(self._regex_defaults)
|
||||
chain._functional_defaults = dict(self._functional_defaults)
|
||||
chain._string_defaults = dict(self._string_defaults)
|
||||
return chain
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_re(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_functional(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._kwargs, kwargs)
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
pattern = self.rebulk.build_string(*pattern, **kwargs)
|
||||
part = ChainPart(self, pattern)
|
||||
if not pattern:
|
||||
raise ValueError("One pattern should be given to the chain")
|
||||
if len(pattern) > 1:
|
||||
raise ValueError("Only one pattern can be given to the chain")
|
||||
part = ChainPart(self, pattern[0])
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close chain builder to continue registering other pattern
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
Deeply close the chain
|
||||
:return: Rebulk instance
|
||||
"""
|
||||
return self.rebulk
|
||||
parent = self.parent
|
||||
while isinstance(parent, Chain):
|
||||
parent = parent.parent
|
||||
return parent
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
# pylint: disable=too-many-locals,too-many-nested-blocks
|
||||
|
@ -173,42 +70,20 @@ class Chain(Pattern):
|
|||
chain_found = False
|
||||
current_chain_matches = []
|
||||
valid_chain = True
|
||||
is_chain_start = True
|
||||
for chain_part in self.parts:
|
||||
try:
|
||||
chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part,
|
||||
chain_input_string,
|
||||
context)
|
||||
|
||||
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
|
||||
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
|
||||
|
||||
if raw_chain_part_matches:
|
||||
grouped_matches_dict = dict()
|
||||
for match_index, match in itertools.groupby(chain_part_matches,
|
||||
lambda m: m.match_index):
|
||||
grouped_matches_dict[match_index] = list(match)
|
||||
|
||||
grouped_raw_matches_dict = dict()
|
||||
for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
|
||||
lambda m: m.match_index):
|
||||
grouped_raw_matches_dict[match_index] = list(raw_match)
|
||||
|
||||
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
|
||||
chain_found = True
|
||||
offset = grouped_raw_matches[-1].raw_end
|
||||
chain_input_string = input_string[offset:]
|
||||
if not chain_part.is_hidden:
|
||||
grouped_matches = grouped_matches_dict.get(match_index, [])
|
||||
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
|
||||
current_chain_matches.extend(grouped_matches)
|
||||
chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
|
||||
context,
|
||||
with_raw_matches=True)
|
||||
|
||||
chain_found, chain_input_string, offset = \
|
||||
self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
||||
input_string, chain_input_string, offset, current_chain_matches)
|
||||
except _InvalidChainException:
|
||||
valid_chain = False
|
||||
if current_chain_matches:
|
||||
offset = current_chain_matches[0].raw_end
|
||||
break
|
||||
is_chain_start = False
|
||||
if not chain_found:
|
||||
break
|
||||
if current_chain_matches and valid_chain:
|
||||
|
@ -217,38 +92,66 @@ class Chain(Pattern):
|
|||
|
||||
return chain_matches
|
||||
|
||||
def _match_parent(self, match, yield_parent):
|
||||
def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
||||
input_string, chain_input_string, offset, current_chain_matches):
|
||||
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
|
||||
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
|
||||
|
||||
if raw_chain_part_matches:
|
||||
grouped_matches_dict = self._group_by_match_index(chain_part_matches)
|
||||
grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
|
||||
|
||||
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
|
||||
chain_found = True
|
||||
offset = grouped_raw_matches[-1].raw_end
|
||||
chain_input_string = input_string[offset:]
|
||||
|
||||
if not chain_part.is_hidden:
|
||||
grouped_matches = grouped_matches_dict.get(match_index, [])
|
||||
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
|
||||
current_chain_matches.extend(grouped_matches)
|
||||
return chain_found, chain_input_string, offset
|
||||
|
||||
def _process_match(self, match, match_index, child=False):
|
||||
"""
|
||||
Handle a parent match
|
||||
Handle a match
|
||||
:param match:
|
||||
:type match:
|
||||
:param yield_parent:
|
||||
:type yield_parent:
|
||||
:param match_index:
|
||||
:type match_index:
|
||||
:param child:
|
||||
:type child:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = super(Chain, self)._match_parent(match, yield_parent)
|
||||
original_children = Matches(match.children)
|
||||
original_end = match.end
|
||||
while not ret and match.children:
|
||||
last_pattern = match.children[-1].pattern
|
||||
last_pattern_children = [child for child in match.children if child.pattern == last_pattern]
|
||||
last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index)
|
||||
last_pattern_groups = {}
|
||||
for index, matches in last_pattern_groups_iter:
|
||||
last_pattern_groups[index] = list(matches)
|
||||
# pylint: disable=too-many-locals
|
||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
||||
if ret:
|
||||
return True
|
||||
|
||||
for index in reversed(list(last_pattern_groups)):
|
||||
last_matches = list(last_pattern_groups[index])
|
||||
for last_match in last_matches:
|
||||
match.children.remove(last_match)
|
||||
match.end = match.children[-1].end if match.children else match.start
|
||||
ret = super(Chain, self)._match_parent(match, yield_parent)
|
||||
if ret:
|
||||
return True
|
||||
match.children = original_children
|
||||
match.end = original_end
|
||||
return ret
|
||||
if match.children:
|
||||
last_pattern = match.children[-1].pattern
|
||||
last_pattern_groups = self._group_by_match_index(
|
||||
[child_ for child_ in match.children if child_.pattern == last_pattern]
|
||||
)
|
||||
|
||||
if last_pattern_groups:
|
||||
original_children = Matches(match.children)
|
||||
original_end = match.end
|
||||
|
||||
for index in reversed(list(last_pattern_groups)):
|
||||
last_matches = last_pattern_groups[index]
|
||||
for last_match in last_matches:
|
||||
match.children.remove(last_match)
|
||||
match.end = match.children[-1].end if match.children else match.start
|
||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
||||
if ret:
|
||||
return True
|
||||
|
||||
match.children = original_children
|
||||
match.end = original_end
|
||||
|
||||
return False
|
||||
|
||||
def _build_chain_match(self, current_chain_matches, input_string):
|
||||
start = None
|
||||
|
@ -282,46 +185,11 @@ class Chain(Pattern):
|
|||
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
|
||||
|
||||
@staticmethod
|
||||
def _match_chain_part(is_chain_start, chain_part, chain_input_string, context):
|
||||
chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context,
|
||||
with_raw_matches=True)
|
||||
chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part,
|
||||
chain_input_string)
|
||||
raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part,
|
||||
chain_input_string)
|
||||
|
||||
Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part)
|
||||
return chain_part_matches, raw_chain_part_matches
|
||||
|
||||
@staticmethod
|
||||
def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string):
|
||||
if not chain_part_matches:
|
||||
return chain_part_matches
|
||||
|
||||
if not is_chain_start:
|
||||
separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
|
||||
if separator:
|
||||
return []
|
||||
|
||||
j = 1
|
||||
for i in range(0, len(chain_part_matches) - 1):
|
||||
separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
|
||||
chain_part_matches[i + 1].initiator.raw_start]
|
||||
if separator:
|
||||
break
|
||||
j += 1
|
||||
truncated = chain_part_matches[:j]
|
||||
if chain_part.repeater_end is not None:
|
||||
truncated = [m for m in truncated if m.match_index < chain_part.repeater_end]
|
||||
return truncated
|
||||
|
||||
@staticmethod
|
||||
def _validate_chain_part_matches(chain_part_matches, chain_part):
|
||||
max_match_index = -1
|
||||
if chain_part_matches:
|
||||
max_match_index = max([m.match_index for m in chain_part_matches])
|
||||
if max_match_index + 1 < chain_part.repeater_start:
|
||||
raise _InvalidChainException
|
||||
def _group_by_match_index(matches):
|
||||
grouped_matches_dict = dict()
|
||||
for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
|
||||
grouped_matches_dict[match_index] = list(match)
|
||||
return grouped_matches_dict
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
|
@ -338,7 +206,7 @@ class Chain(Pattern):
|
|||
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
|
||||
|
||||
|
||||
class ChainPart(object):
|
||||
class ChainPart(BasePattern):
|
||||
"""
|
||||
Part of a pattern chain.
|
||||
"""
|
||||
|
@ -350,6 +218,51 @@ class ChainPart(object):
|
|||
self.repeater_end = 1
|
||||
self._hidden = False
|
||||
|
||||
@property
|
||||
def _is_chain_start(self):
|
||||
return self._chain.parts[0] == self
|
||||
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
|
||||
|
||||
matches = self._truncate_repeater(matches, input_string)
|
||||
raw_matches = self._truncate_repeater(raw_matches, input_string)
|
||||
|
||||
self._validate_repeater(raw_matches)
|
||||
|
||||
if with_raw_matches:
|
||||
return matches, raw_matches
|
||||
|
||||
return matches
|
||||
|
||||
def _truncate_repeater(self, matches, input_string):
|
||||
if not matches:
|
||||
return matches
|
||||
|
||||
if not self._is_chain_start:
|
||||
separator = input_string[0:matches[0].initiator.raw_start]
|
||||
if separator:
|
||||
return []
|
||||
|
||||
j = 1
|
||||
for i in range(0, len(matches) - 1):
|
||||
separator = input_string[matches[i].initiator.raw_end:
|
||||
matches[i + 1].initiator.raw_start]
|
||||
if separator:
|
||||
break
|
||||
j += 1
|
||||
truncated = matches[:j]
|
||||
if self.repeater_end is not None:
|
||||
truncated = [m for m in truncated if m.match_index < self.repeater_end]
|
||||
return truncated
|
||||
|
||||
def _validate_repeater(self, matches):
|
||||
max_match_index = -1
|
||||
if matches:
|
||||
max_match_index = max([m.match_index for m in matches])
|
||||
if max_match_index + 1 < self.repeater_start:
|
||||
raise _InvalidChainException
|
||||
|
||||
def chain(self):
|
||||
"""
|
||||
Add patterns chain, using configuration from this chain
|
||||
|
|
|
@ -15,9 +15,19 @@ def formatters(*chained_formatters):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def formatters_chain(input_string): # pylint:disable=missing-docstring
|
||||
for chained_formatter in chained_formatters:
|
||||
input_string = chained_formatter(input_string)
|
||||
return input_string
|
||||
|
||||
return formatters_chain
|
||||
|
||||
|
||||
def default_formatter(input_string):
|
||||
"""
|
||||
Default formatter
|
||||
:param input_string:
|
||||
:return:
|
||||
"""
|
||||
return input_string
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"""
|
||||
Introspect rebulk object to retrieve capabilities.
|
||||
"""
|
||||
from abc import ABCMeta, abstractproperty
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import defaultdict
|
||||
|
||||
import six
|
||||
|
@ -16,7 +16,8 @@ class Description(object):
|
|||
"""
|
||||
Abstract class for a description.
|
||||
"""
|
||||
@abstractproperty
|
||||
@property
|
||||
@abstractmethod
|
||||
def properties(self): # pragma: no cover
|
||||
"""
|
||||
Properties of described object.
|
||||
|
|
|
@ -4,12 +4,12 @@
|
|||
Various utilities functions
|
||||
"""
|
||||
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
|
||||
from inspect import isclass
|
||||
try:
|
||||
from inspect import getfullargspec as getargspec
|
||||
|
||||
_fullargspec_supported = True
|
||||
except ImportError:
|
||||
_fullargspec_supported = False
|
||||
|
@ -55,8 +55,8 @@ def call(function, *args, **kwargs):
|
|||
:return: sale vakye as default function call
|
||||
:rtype: object
|
||||
"""
|
||||
func = constructor_args if inspect.isclass(function) else function_args
|
||||
call_args, call_kwargs = func(function, *args, **kwargs)
|
||||
func = constructor_args if isclass(function) else function_args
|
||||
call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20
|
||||
return function(*call_args, **call_kwargs)
|
||||
|
||||
|
||||
|
@ -145,6 +145,8 @@ if not _fullargspec_supported:
|
|||
else:
|
||||
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
|
||||
return call_args, call_kwarg
|
||||
|
||||
|
||||
argspec_args = argspec_args_legacy
|
||||
|
||||
|
||||
|
@ -215,9 +217,12 @@ def filter_index(collection, predicate=None, index=None):
|
|||
return collection
|
||||
|
||||
|
||||
def set_defaults(defaults, kwargs):
|
||||
def set_defaults(defaults, kwargs, override=False):
|
||||
"""
|
||||
Set defaults from defaults dict to kwargs dict
|
||||
|
||||
:param override:
|
||||
:type override:
|
||||
:param defaults:
|
||||
:type defaults:
|
||||
:param kwargs:
|
||||
|
@ -225,12 +230,13 @@ def set_defaults(defaults, kwargs):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if 'clear' in defaults.keys() and defaults.pop('clear'):
|
||||
kwargs.clear()
|
||||
for key, value in defaults.items():
|
||||
if key not in kwargs and value is not None:
|
||||
if key in kwargs:
|
||||
if isinstance(value, list) and isinstance(kwargs[key], list):
|
||||
kwargs[key] = list(value) + kwargs[key]
|
||||
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
|
||||
set_defaults(value, kwargs[key])
|
||||
if key not in kwargs or override:
|
||||
kwargs[key] = value
|
||||
elif isinstance(value, list) and isinstance(kwargs[key], list):
|
||||
kwargs[key] = list(value) + kwargs[key]
|
||||
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
|
||||
set_defaults(value, kwargs[key])
|
||||
elif key in kwargs and value is None:
|
||||
kwargs[key] = None
|
||||
|
|
|
@ -815,6 +815,24 @@ class Match(object):
|
|||
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def tagged(self, *tags):
|
||||
"""
|
||||
Check if this match has at least one of the provided tags
|
||||
|
||||
:param tags:
|
||||
:return: True if at least one tag is defined, False otherwise.
|
||||
"""
|
||||
return any(tag in self.tags for tag in tags)
|
||||
|
||||
def named(self, *names):
|
||||
"""
|
||||
Check if one of the children match has one of the provided name
|
||||
|
||||
:param names:
|
||||
:return: True if at least one child is named with a given name is defined, False otherwise.
|
||||
"""
|
||||
return any(name in self.names for name in names)
|
||||
|
||||
def __len__(self):
|
||||
return self.end - self.start
|
||||
|
||||
|
|
|
@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty
|
|||
import six
|
||||
|
||||
from . import debug
|
||||
from .formatters import default_formatter
|
||||
from .loose import call, ensure_list, ensure_dict
|
||||
from .match import Match
|
||||
from .remodule import re, REGEX_AVAILABLE
|
||||
from .utils import find_all, is_iterable, get_first_defined
|
||||
from .validators import allways_true
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Pattern(object):
|
||||
class BasePattern(object):
|
||||
"""
|
||||
Base class for Pattern like objects
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
"""
|
||||
Computes all matches for a given input
|
||||
|
||||
:param input_string: the string to parse
|
||||
:type input_string: str
|
||||
:param context: the context
|
||||
:type context: dict
|
||||
:param with_raw_matches: should return details
|
||||
:type with_raw_matches: dict
|
||||
:return: matches based on input_string for this pattern
|
||||
:rtype: iterator[Match]
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Pattern(BasePattern):
|
||||
"""
|
||||
Definition of a particular pattern to search for.
|
||||
"""
|
||||
|
@ -25,7 +50,7 @@ class Pattern(object):
|
|||
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
|
||||
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
|
||||
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
|
||||
properties=None, post_processor=None, **kwargs):
|
||||
properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
|
||||
"""
|
||||
:param name: Name of this pattern
|
||||
:type name: str
|
||||
|
@ -66,15 +91,19 @@ class Pattern(object):
|
|||
:type disabled: bool|function
|
||||
:param log_lvl: Log level associated to this pattern
|
||||
:type log_lvl: int
|
||||
:param post_process: Post processing function
|
||||
:param post_processor: Post processing function
|
||||
:type post_processor: func
|
||||
:param pre_match_processor: Pre match processing function
|
||||
:type pre_match_processor: func
|
||||
:param post_match_processor: Post match processing function
|
||||
:type post_match_processor: func
|
||||
"""
|
||||
# pylint:disable=too-many-locals,unused-argument
|
||||
self.name = name
|
||||
self.tags = ensure_list(tags)
|
||||
self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x)
|
||||
self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
|
||||
self.values, self._default_value = ensure_dict(value, None)
|
||||
self.validators, self._default_validator = ensure_dict(validator, lambda match: True)
|
||||
self.validators, self._default_validator = ensure_dict(validator, allways_true)
|
||||
self.every = every
|
||||
self.children = children
|
||||
self.private = private
|
||||
|
@ -96,6 +125,14 @@ class Pattern(object):
|
|||
self.post_processor = None
|
||||
else:
|
||||
self.post_processor = post_processor
|
||||
if not callable(pre_match_processor):
|
||||
self.pre_match_processor = None
|
||||
else:
|
||||
self.pre_match_processor = pre_match_processor
|
||||
if not callable(post_match_processor):
|
||||
self.post_match_processor = None
|
||||
else:
|
||||
self.post_match_processor = post_match_processor
|
||||
|
||||
@property
|
||||
def log_level(self):
|
||||
|
@ -106,83 +143,6 @@ class Pattern(object):
|
|||
"""
|
||||
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
|
||||
|
||||
def _yield_children(self, match):
|
||||
"""
|
||||
Does this match has children
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return match.children and (self.children or self.every)
|
||||
|
||||
def _yield_parent(self):
|
||||
"""
|
||||
Does this mat
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return not self.children or self.every
|
||||
|
||||
def _match_parent(self, match, yield_parent):
|
||||
"""
|
||||
Handle a parent match
|
||||
:param match:
|
||||
:type match:
|
||||
:param yield_parent:
|
||||
:type yield_parent:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not match or match.value == "":
|
||||
return False
|
||||
|
||||
pattern_value = get_first_defined(self.values, [match.name, '__parent__', None],
|
||||
self._default_value)
|
||||
if pattern_value:
|
||||
match.value = pattern_value
|
||||
|
||||
if yield_parent or self.format_all:
|
||||
match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None],
|
||||
self._default_formatter)
|
||||
if yield_parent or self.validate_all:
|
||||
validator = get_first_defined(self.validators, [match.name, '__parent__', None],
|
||||
self._default_validator)
|
||||
if validator and not validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _match_child(self, child, yield_children):
|
||||
"""
|
||||
Handle a children match
|
||||
:param child:
|
||||
:type child:
|
||||
:param yield_children:
|
||||
:type yield_children:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not child or child.value == "":
|
||||
return False
|
||||
|
||||
pattern_value = get_first_defined(self.values, [child.name, '__children__', None],
|
||||
self._default_value)
|
||||
if pattern_value:
|
||||
child.value = pattern_value
|
||||
|
||||
if yield_children or self.format_all:
|
||||
child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None],
|
||||
self._default_formatter)
|
||||
|
||||
if yield_children or self.validate_all:
|
||||
validator = get_first_defined(self.validators, [child.name, '__children__', None],
|
||||
self._default_validator)
|
||||
if validator and not validator(child):
|
||||
return False
|
||||
return True
|
||||
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
"""
|
||||
Computes all matches for a given input
|
||||
|
@ -200,41 +160,168 @@ class Pattern(object):
|
|||
|
||||
matches = []
|
||||
raw_matches = []
|
||||
|
||||
for pattern in self.patterns:
|
||||
yield_parent = self._yield_parent()
|
||||
match_index = -1
|
||||
match_index = 0
|
||||
for match in self._match(pattern, input_string, context):
|
||||
match_index += 1
|
||||
match.match_index = match_index
|
||||
raw_matches.append(match)
|
||||
yield_children = self._yield_children(match)
|
||||
if not self._match_parent(match, yield_parent):
|
||||
continue
|
||||
validated = True
|
||||
for child in match.children:
|
||||
if not self._match_child(child, yield_children):
|
||||
validated = False
|
||||
break
|
||||
if validated:
|
||||
if self.private_parent:
|
||||
match.private = True
|
||||
if self.private_children:
|
||||
for child in match.children:
|
||||
child.private = True
|
||||
if yield_parent or self.private_parent:
|
||||
matches.append(match)
|
||||
if yield_children or self.private_children:
|
||||
for child in match.children:
|
||||
child.match_index = match_index
|
||||
matches.append(child)
|
||||
matches = self._matches_post_process(matches)
|
||||
self._matches_privatize(matches)
|
||||
self._matches_ignore(matches)
|
||||
matches.extend(self._process_matches(match, match_index))
|
||||
match_index += 1
|
||||
|
||||
matches = self._post_process_matches(matches)
|
||||
|
||||
if with_raw_matches:
|
||||
return matches, raw_matches
|
||||
return matches
|
||||
|
||||
def _matches_post_process(self, matches):
|
||||
@property
|
||||
def _should_include_children(self):
|
||||
"""
|
||||
Check if children matches from this pattern should be included in matches results.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self.children or self.every
|
||||
|
||||
@property
|
||||
def _should_include_parent(self):
|
||||
"""
|
||||
Check is a match from this pattern should be included in matches results.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return not self.children or self.every
|
||||
|
||||
@staticmethod
|
||||
def _match_config_property_keys(match, child=False):
|
||||
if match.name:
|
||||
yield match.name
|
||||
if child:
|
||||
yield '__children__'
|
||||
else:
|
||||
yield '__parent__'
|
||||
yield None
|
||||
|
||||
@staticmethod
|
||||
def _process_match_index(match, match_index):
|
||||
"""
|
||||
Process match index from this pattern process state.
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
match.match_index = match_index
|
||||
|
||||
def _process_match_private(self, match, child=False):
|
||||
"""
|
||||
Process match privacy from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:param child:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if match.name and match.name in self.private_names or \
|
||||
not child and self.private_parent or \
|
||||
child and self.private_children:
|
||||
match.private = True
|
||||
|
||||
def _process_match_value(self, match, child=False):
|
||||
"""
|
||||
Process match value from this pattern configuration.
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
pattern_value = get_first_defined(self.values, keys, self._default_value)
|
||||
if pattern_value:
|
||||
match.value = pattern_value
|
||||
|
||||
def _process_match_formatter(self, match, child=False):
|
||||
"""
|
||||
Process match formatter from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
included = self._should_include_children if child else self._should_include_parent
|
||||
if included or self.format_all:
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
|
||||
|
||||
def _process_match_validator(self, match, child=False):
|
||||
"""
|
||||
Process match validation from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:return: True if match is validated by the configured validator, False otherwise.
|
||||
"""
|
||||
included = self._should_include_children if child else self._should_include_parent
|
||||
if included or self.validate_all:
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
validator = get_first_defined(self.validators, keys, self._default_validator)
|
||||
if validator and not validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _process_match(self, match, match_index, child=False):
|
||||
"""
|
||||
Process match from this pattern by setting all properties from defined configuration
|
||||
(index, private, value, formatter, validator, ...).
|
||||
|
||||
:param match:
|
||||
:type match:
|
||||
:return: True if match is validated by the configured validator, False otherwise.
|
||||
:rtype:
|
||||
"""
|
||||
self._process_match_index(match, match_index)
|
||||
self._process_match_private(match, child)
|
||||
self._process_match_value(match, child)
|
||||
self._process_match_formatter(match, child)
|
||||
return self._process_match_validator(match, child)
|
||||
|
||||
@staticmethod
|
||||
def _process_match_processor(match, processor):
|
||||
if processor:
|
||||
ret = processor(match)
|
||||
if ret is not None:
|
||||
return ret
|
||||
return match
|
||||
|
||||
def _process_matches(self, match, match_index):
|
||||
"""
|
||||
Process and generate all matches for the given unprocessed match.
|
||||
:param match:
|
||||
:param match_index:
|
||||
:return: Process and dispatched matches.
|
||||
"""
|
||||
match = self._process_match_processor(match, self.pre_match_processor)
|
||||
if not match:
|
||||
return
|
||||
|
||||
if not self._process_match(match, match_index):
|
||||
return
|
||||
|
||||
for child in match.children:
|
||||
if not self._process_match(child, match_index, child=True):
|
||||
return
|
||||
|
||||
match = self._process_match_processor(match, self.post_match_processor)
|
||||
if not match:
|
||||
return
|
||||
|
||||
if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
|
||||
yield match
|
||||
if self._should_include_children or self.private_children:
|
||||
children = [x for x in match.children if x.name not in self.ignore_names]
|
||||
for child in children:
|
||||
yield child
|
||||
|
||||
def _post_process_matches(self, matches):
|
||||
"""
|
||||
Post process matches with user defined function
|
||||
:param matches:
|
||||
|
@ -246,32 +333,6 @@ class Pattern(object):
|
|||
return self.post_processor(matches, self)
|
||||
return matches
|
||||
|
||||
def _matches_privatize(self, matches):
|
||||
"""
|
||||
Mark matches included in private_names with private flag.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.private_names:
|
||||
for match in matches:
|
||||
if match.name in self.private_names:
|
||||
match.private = True
|
||||
|
||||
def _matches_ignore(self, matches):
|
||||
"""
|
||||
Ignore matches included in ignore_names.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.ignore_names:
|
||||
for match in list(matches):
|
||||
if match.name in self.ignore_names:
|
||||
matches.remove(match)
|
||||
|
||||
@abstractproperty
|
||||
def patterns(self): # pragma: no cover
|
||||
"""
|
||||
|
@ -306,7 +367,7 @@ class Pattern(object):
|
|||
@abstractmethod
|
||||
def _match(self, pattern, input_string, context=None): # pragma: no cover
|
||||
"""
|
||||
Computes all matches for a given pattern and input
|
||||
Computes all unprocess matches for a given pattern and input.
|
||||
|
||||
:param pattern: the pattern to use
|
||||
:param input_string: the string to parse
|
||||
|
@ -350,7 +411,9 @@ class StringPattern(Pattern):
|
|||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
for index in find_all(input_string, pattern, **self._kwargs):
|
||||
yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
|
||||
class RePattern(Pattern):
|
||||
|
@ -411,15 +474,18 @@ class RePattern(Pattern):
|
|||
for start, end in match_object.spans(i):
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
main_match.children.append(child_match)
|
||||
if child_match:
|
||||
main_match.children.append(child_match)
|
||||
else:
|
||||
start, end = match_object.span(i)
|
||||
if start > -1 and end > -1:
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
main_match.children.append(child_match)
|
||||
if child_match:
|
||||
main_match.children.append(child_match)
|
||||
|
||||
yield main_match
|
||||
if main_match:
|
||||
yield main_match
|
||||
|
||||
|
||||
class FunctionalPattern(Pattern):
|
||||
|
@ -457,14 +523,18 @@ class FunctionalPattern(Pattern):
|
|||
if self._match_kwargs:
|
||||
options = self._match_kwargs.copy()
|
||||
options.update(args)
|
||||
yield Match(pattern=self, input_string=input_string, **options)
|
||||
match = Match(pattern=self, input_string=input_string, **options)
|
||||
if match:
|
||||
yield match
|
||||
else:
|
||||
kwargs = self._match_kwargs
|
||||
if isinstance(args[-1], dict):
|
||||
kwargs = dict(kwargs)
|
||||
kwargs.update(args[-1])
|
||||
args = args[:-1]
|
||||
yield Match(*args, pattern=self, input_string=input_string, **kwargs)
|
||||
match = Match(*args, pattern=self, input_string=input_string, **kwargs)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
|
||||
def filter_match_kwargs(kwargs, children=False):
|
||||
|
|
|
@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk
|
|||
"""
|
||||
from logging import getLogger
|
||||
|
||||
from .builder import Builder
|
||||
from .match import Matches
|
||||
|
||||
from .pattern import RePattern, StringPattern, FunctionalPattern
|
||||
from .chain import Chain
|
||||
|
||||
from .processors import ConflictSolver, PrivateRemover
|
||||
from .loose import set_defaults
|
||||
from .utils import extend_safe
|
||||
from .rules import Rules
|
||||
from .utils import extend_safe
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
class Rebulk(object):
|
||||
class Rebulk(Builder):
|
||||
r"""
|
||||
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
|
||||
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
|
||||
|
@ -44,6 +40,7 @@ class Rebulk(object):
|
|||
>>> bulk.matches("the lakers are from la")
|
||||
[<lakers:(4, 10)>, <la:(20, 22)>]
|
||||
"""
|
||||
|
||||
# pylint:disable=protected-access
|
||||
|
||||
def __init__(self, disabled=lambda context: False, default_rules=True):
|
||||
|
@ -56,6 +53,7 @@ class Rebulk(object):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
super(Rebulk, self).__init__()
|
||||
if not callable(disabled):
|
||||
self.disabled = lambda context: disabled
|
||||
else:
|
||||
|
@ -64,11 +62,6 @@ class Rebulk(object):
|
|||
self._rules = Rules()
|
||||
if default_rules:
|
||||
self.rules(ConflictSolver, PrivateRemover)
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
self._chain_defaults = {}
|
||||
self._rebulks = []
|
||||
|
||||
def pattern(self, *pattern):
|
||||
|
@ -83,172 +76,6 @@ class Rebulk(object):
|
|||
self._patterns.extend(pattern)
|
||||
return self
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._regex_defaults = kwargs
|
||||
return self
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_re(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_re(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new regular expression pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return RePattern(*pattern, **kwargs)
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._string_defaults = kwargs
|
||||
return self
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_string(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._string_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return StringPattern(*pattern, **kwargs)
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._functional_defaults = kwargs
|
||||
return self
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self.pattern(self.build_functional(*pattern, **kwargs))
|
||||
return self
|
||||
|
||||
def build_functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return FunctionalPattern(*pattern, **kwargs)
|
||||
|
||||
def chain_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for patterns chain.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._chain_defaults = kwargs
|
||||
return self
|
||||
|
||||
def chain(self, **kwargs):
|
||||
"""
|
||||
Add patterns chain, using configuration of this rebulk
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
chain = self.build_chain(**kwargs)
|
||||
self._patterns.append(chain)
|
||||
return chain
|
||||
|
||||
def build_chain(self, **kwargs):
|
||||
"""
|
||||
Builds a new patterns chain
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._chain_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return Chain(self, **kwargs)
|
||||
|
||||
def rules(self, *rules):
|
||||
"""
|
||||
Add rules as a module, class or instance.
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
|
||||
import re
|
||||
|
||||
from functools import partial
|
||||
|
||||
from rebulk.pattern import FunctionalPattern, StringPattern, RePattern
|
||||
from ..rebulk import Rebulk
|
||||
from ..validators import chars_surround
|
||||
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
|
||||
|
||||
|
||||
def test_chain_close():
|
||||
|
@ -63,18 +63,61 @@ def test_build_chain():
|
|||
|
||||
def test_chain_defaults():
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
|
||||
rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True)
|
||||
|
||||
rebulk.chain()\
|
||||
rebulk.chain() \
|
||||
.regex("(?P<test>test)") \
|
||||
.regex(" ").repeater("*") \
|
||||
.regex("(?P<best>best)") \
|
||||
.regex(" ").repeater("*") \
|
||||
.regex("(?P<testIgnore>testIgnore)")
|
||||
matches = rebulk.matches("test testIgnore")
|
||||
matches = rebulk.matches("test best testIgnore")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].name == "test"
|
||||
|
||||
|
||||
def test_chain_with_validators():
|
||||
def chain_validator(match):
|
||||
return match.value.startswith('t') and match.value.endswith('t')
|
||||
|
||||
def default_validator(match):
|
||||
return match.value.startswith('t') and match.value.endswith('g')
|
||||
|
||||
def custom_validator(match):
|
||||
return match.value.startswith('b') and match.value.endswith('t')
|
||||
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(children=True, validator=default_validator)
|
||||
|
||||
rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \
|
||||
.regex("(?P<test>testing)", validator=default_validator).repeater("+") \
|
||||
.regex(" ").repeater("+") \
|
||||
.regex("(?P<best>best)", validator=custom_validator).repeater("+")
|
||||
matches = rebulk.matches("some testing best end")
|
||||
|
||||
assert len(matches) == 2
|
||||
assert matches[0].name == "test"
|
||||
assert matches[1].name == "best"
|
||||
|
||||
|
||||
def test_matches_docs():
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \
|
||||
.defaults(children=True, formatter={'episode': int, 'version': int}) \
|
||||
.chain() \
|
||||
.regex(r'e(?P<episode>\d{1,4})').repeater(1) \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
|
||||
.close() # .repeater(1) could be omitted as it's the default behavior
|
||||
|
||||
result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict
|
||||
|
||||
assert 'episode' in result
|
||||
assert result['episode'] == [14, 15, 16, 17]
|
||||
assert 'version' in result
|
||||
assert result['version'] == 2
|
||||
|
||||
|
||||
def test_matches():
|
||||
rebulk = Rebulk()
|
||||
|
||||
|
@ -144,8 +187,8 @@ def test_matches():
|
|||
def test_matches_2():
|
||||
rebulk = Rebulk() \
|
||||
.regex_defaults(flags=re.IGNORECASE) \
|
||||
.chain(children=True, formatter={'episode': int}) \
|
||||
.defaults(formatter={'version': int}) \
|
||||
.defaults(children=True, formatter={'episode': int, 'version': int}) \
|
||||
.chain() \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
|
||||
|
@ -173,25 +216,32 @@ def test_matches_2():
|
|||
def test_matches_3():
|
||||
alt_dash = (r'@', r'[\W_]') # abbreviation
|
||||
|
||||
rebulk = Rebulk()
|
||||
match_names = ['season', 'episode']
|
||||
other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']
|
||||
|
||||
rebulk.chain(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
private_names=['episodeSeparator', 'seasonSeparator'],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=lambda match, other: match
|
||||
if match.name in ['season', 'episode'] and other.name in
|
||||
['screen_size', 'video_codec', 'audio_codec',
|
||||
'audio_channels', 'container', 'date']
|
||||
else '__default__') \
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
private_names=['episodeSeparator', 'seasonSeparator'],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=lambda match, other: match
|
||||
if match.name in match_names and other.name in other_names
|
||||
else '__default__')
|
||||
|
||||
rebulk.chain() \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
|
||||
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
|
||||
.close() \
|
||||
.chain() \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
|
||||
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
|
||||
.close() \
|
||||
.chain() \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'S(?P<season>\d+)') \
|
||||
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
|
||||
|
||||
|
@ -240,11 +290,11 @@ def test_matches_4():
|
|||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
rebulk.defaults(validate_all=True, children=True)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \
|
||||
.defaults(formatter={'episode': int, 'version': int}) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
|
@ -262,11 +312,11 @@ def test_matches_5():
|
|||
|
||||
rebulk = Rebulk()
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
|
||||
validator={'__parent__': seps_surround}, children=True, private_parent=True,
|
||||
formatter={'episode': int, 'version': int}) \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
@ -288,7 +338,7 @@ def test_matches_6():
|
|||
validator=None, children=True, private_parent=True)
|
||||
|
||||
rebulk.chain(formatter={'episode': int, 'version': int}) \
|
||||
.defaults(validator=None) \
|
||||
.defaults(children=True, private_parent=True) \
|
||||
.regex(r'e(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
|
||||
|
|
|
@ -2,19 +2,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
|
||||
|
||||
from .default_rules_module import RuleRemove0
|
||||
from .. import debug
|
||||
from ..match import Match
|
||||
from ..pattern import StringPattern
|
||||
from ..rebulk import Rebulk
|
||||
from ..match import Match
|
||||
from .. import debug
|
||||
from .default_rules_module import RuleRemove0
|
||||
|
||||
|
||||
class TestDebug(object):
|
||||
|
||||
|
||||
#request.addfinalizer(disable_debug)
|
||||
|
||||
|
||||
# request.addfinalizer(disable_debug)
|
||||
|
||||
debug.DEBUG = True
|
||||
pattern = StringPattern(1, 3, value="es")
|
||||
|
@ -38,43 +34,43 @@ class TestDebug(object):
|
|||
debug.DEBUG = False
|
||||
|
||||
def test_pattern(self):
|
||||
assert self.pattern.defined_at.lineno == 20
|
||||
assert self.pattern.defined_at.lineno > 0
|
||||
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.pattern.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.pattern.defined_at) == 'test_debug.py#L20'
|
||||
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
|
||||
assert str(self.pattern.defined_at).startswith('test_debug.py#L')
|
||||
assert repr(self.pattern).startswith('<StringPattern@test_debug.py#L')
|
||||
|
||||
def test_match(self):
|
||||
assert self.match.defined_at.lineno == 22
|
||||
assert self.match.defined_at.lineno > 0
|
||||
assert self.match.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.match.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.match.defined_at) == 'test_debug.py#L22'
|
||||
assert str(self.match.defined_at).startswith('test_debug.py#L')
|
||||
|
||||
def test_rule(self):
|
||||
assert self.rule.defined_at.lineno == 23
|
||||
assert self.rule.defined_at.lineno > 0
|
||||
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rule.defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rule.defined_at) == 'test_debug.py#L23'
|
||||
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
|
||||
assert str(self.rule.defined_at).startswith('test_debug.py#L')
|
||||
assert repr(self.rule).startswith('<RuleRemove0@test_debug.py#L')
|
||||
|
||||
def test_rebulk(self):
|
||||
"""
|
||||
This test fails on travis CI, can't find out why there's 1 line offset ...
|
||||
"""
|
||||
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
|
||||
assert self.rebulk._patterns[0].defined_at.lineno > 0
|
||||
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
|
||||
assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L')
|
||||
|
||||
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
|
||||
assert self.rebulk._patterns[1].defined_at.lineno > 0
|
||||
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
|
||||
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
|
||||
|
||||
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
|
||||
assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L')
|
||||
|
||||
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
|
||||
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at
|
||||
|
|
|
@ -116,6 +116,9 @@ class TestMatchesClass(object):
|
|||
assert "tag1" in matches.tags
|
||||
assert "tag2" in matches.tags
|
||||
|
||||
assert self.match3.tagged("tag1")
|
||||
assert not self.match3.tagged("start")
|
||||
|
||||
tag1 = matches.tagged("tag1")
|
||||
assert len(tag1) == 2
|
||||
assert tag1[0] == self.match2
|
||||
|
|
|
@ -62,9 +62,20 @@ def validators(*chained_validators):
|
|||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def validator_chain(match): # pylint:disable=missing-docstring
|
||||
for chained_validator in chained_validators:
|
||||
if not chained_validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
return validator_chain
|
||||
|
||||
|
||||
def allways_true(match): # pylint:disable=unused-argument
|
||||
"""
|
||||
A validator which is allways true
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
return True
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue