diff --git a/libs/common/bin/chardetect.exe b/libs/common/bin/guessit.exe similarity index 99% rename from libs/common/bin/chardetect.exe rename to libs/common/bin/guessit.exe index f93a4a82..550c3cb3 100644 Binary files a/libs/common/bin/chardetect.exe and b/libs/common/bin/guessit.exe differ diff --git a/libs/common/dateutil/_version.py b/libs/common/dateutil/_version.py index d3ce8561..b723056a 100644 --- a/libs/common/dateutil/_version.py +++ b/libs/common/dateutil/_version.py @@ -1,4 +1,5 @@ # coding: utf-8 # file generated by setuptools_scm # don't change, don't track in version control -version = '2.7.5' +version = '2.8.2' +version_tuple = (2, 8, 2) diff --git a/libs/common/dateutil/easter.py b/libs/common/dateutil/easter.py index 53b7c789..f74d1f74 100644 --- a/libs/common/dateutil/easter.py +++ b/libs/common/dateutil/easter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -This module offers a generic easter computing method for any given year, using +This module offers a generic Easter computing method for any given year, using Western, Orthodox or Julian algorithms. """ @@ -21,15 +21,15 @@ def easter(year, method=EASTER_WESTERN): quoted in "Explanatory Supplement to the Astronomical Almanac", P. Kenneth Seidelmann, editor. - This algorithm implements three different easter + This algorithm implements three different Easter calculation methods: - 1 - Original calculation in Julian calendar, valid in - dates after 326 AD - 2 - Original method, with date converted to Gregorian - calendar, valid in years 1583 to 4099 - 3 - Revised method, in Gregorian calendar, valid in - years 1583 to 4099 as well + 1. Original calculation in Julian calendar, valid in + dates after 326 AD + 2. Original method, with date converted to Gregorian + calendar, valid in years 1583 to 4099 + 3. Revised method, in Gregorian calendar, valid in + years 1583 to 4099 as well These methods are represented by the constants: diff --git a/libs/common/dateutil/parser/__init__.py b/libs/common/dateutil/parser/__init__.py index 216762c0..d174b0e4 100644 --- a/libs/common/dateutil/parser/__init__.py +++ b/libs/common/dateutil/parser/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from ._parser import parse, parser, parserinfo +from ._parser import parse, parser, parserinfo, ParserError from ._parser import DEFAULTPARSER, DEFAULTTZPARSER from ._parser import UnknownTimezoneWarning @@ -9,6 +9,7 @@ from .isoparser import isoparser, isoparse __all__ = ['parse', 'parser', 'parserinfo', 'isoparse', 'isoparser', + 'ParserError', 'UnknownTimezoneWarning'] diff --git a/libs/common/dateutil/parser/_parser.py b/libs/common/dateutil/parser/_parser.py index 9d2bb795..37d1663b 100644 --- a/libs/common/dateutil/parser/_parser.py +++ b/libs/common/dateutil/parser/_parser.py @@ -20,11 +20,11 @@ value falls back to the end of the month. Additional resources about date/time string formats can be found below: - `A summary of the international standard date and time notation - `_ -- `W3C Date and Time Formats `_ + `_ +- `W3C Date and Time Formats `_ - `Time Formats (Planetary Rings Node) `_ - `CPAN ParseDate module - `_ + `_ - `Java SimpleDateFormat Class `_ """ @@ -40,7 +40,7 @@ from calendar import monthrange from io import StringIO import six -from six import binary_type, integer_types, text_type +from six import integer_types, text_type from decimal import Decimal @@ -49,7 +49,7 @@ from warnings import warn from .. import relativedelta from .. import tz -__all__ = ["parse", "parserinfo"] +__all__ = ["parse", "parserinfo", "ParserError"] # TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth @@ -60,14 +60,8 @@ class _timelex(object): _split_decimal = re.compile("([.,])") def __init__(self, instream): - if six.PY2: - # In Python 2, we can't duck type properly because unicode has - # a 'decode' function, and we'd be double-decoding - if isinstance(instream, (binary_type, bytearray)): - instream = instream.decode() - else: - if getattr(instream, 'decode', None) is not None: - instream = instream.decode() + if isinstance(instream, (bytes, bytearray)): + instream = instream.decode() if isinstance(instream, text_type): instream = StringIO(instream) @@ -291,7 +285,7 @@ class parserinfo(object): ("s", "second", "seconds")] AMPM = [("am", "a"), ("pm", "p")] - UTCZONE = ["UTC", "GMT", "Z"] + UTCZONE = ["UTC", "GMT", "Z", "z"] PERTAIN = ["of"] TZOFFSET = {} # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", @@ -388,7 +382,8 @@ class parserinfo(object): if res.year is not None: res.year = self.convertyear(res.year, res.century_specified) - if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z': + if ((res.tzoffset == 0 and not res.tzname) or + (res.tzname == 'Z' or res.tzname == 'z')): res.tzname = "UTC" res.tzoffset = 0 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): @@ -422,7 +417,7 @@ class _ymd(list): elif not self.has_month: return 1 <= value <= 31 elif not self.has_year: - # Be permissive, assume leapyear + # Be permissive, assume leap year month = self[self.mstridx] return 1 <= value <= monthrange(2000, month)[1] else: @@ -538,7 +533,7 @@ class _ymd(list): year, month, day = self else: # 01-Jan-01 - # Give precendence to day-first, since + # Give precedence to day-first, since # two-digit years is usually hand-written. day, month, year = self @@ -625,7 +620,7 @@ class parser(object): first element being a :class:`datetime.datetime` object, the second a tuple containing the fuzzy tokens. - :raises ValueError: + :raises ParserError: Raised for invalid or unknown string format, if the provided :class:`tzinfo` is not in a valid format, or if an invalid date would be created. @@ -645,12 +640,15 @@ class parser(object): res, skipped_tokens = self._parse(timestr, **kwargs) if res is None: - raise ValueError("Unknown string format:", timestr) + raise ParserError("Unknown string format: %s", timestr) if len(res) == 0: - raise ValueError("String does not contain a date:", timestr) + raise ParserError("String does not contain a date: %s", timestr) - ret = self._build_naive(res, default) + try: + ret = self._build_naive(res, default) + except ValueError as e: + six.raise_from(ParserError(str(e) + ": %s", timestr), e) if not ignoretz: ret = self._build_tzaware(ret, res, tzinfos) @@ -1021,7 +1019,7 @@ class parser(object): hms_idx = idx + 2 elif idx > 0 and info.hms(tokens[idx-1]) is not None: - # There is a "h", "m", or "s" preceeding this token. Since neither + # There is a "h", "m", or "s" preceding this token. Since neither # of the previous cases was hit, there is no label following this # token, so we use the previous label. # e.g. the "04" in "12h04" @@ -1060,7 +1058,8 @@ class parser(object): tzname is None and tzoffset is None and len(token) <= 5 and - all(x in string.ascii_uppercase for x in token)) + (all(x in string.ascii_uppercase for x in token) + or token in self.info.UTCZONE)) def _ampm_valid(self, hour, ampm, fuzzy): """ @@ -1100,7 +1099,7 @@ class parser(object): def _parse_min_sec(self, value): # TODO: Every usage of this function sets res.second to the return # value. Are there any cases where second will be returned as None and - # we *dont* want to set res.second = None? + # we *don't* want to set res.second = None? minute = int(value) second = None @@ -1109,14 +1108,6 @@ class parser(object): second = int(60 * sec_remainder) return (minute, second) - def _parsems(self, value): - """Parse a I[.F] seconds value into (seconds, microseconds).""" - if "." not in value: - return int(value), 0 - else: - i, f = value.split(".") - return int(i), int(f.ljust(6, "0")[:6]) - def _parse_hms(self, idx, tokens, info, hms_idx): # TODO: Is this going to admit a lot of false-positives for when we # just happen to have digits and "h", "m" or "s" characters in non-date @@ -1135,21 +1126,35 @@ class parser(object): return (new_idx, hms) - def _recombine_skipped(self, tokens, skipped_idxs): - """ - >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] - >>> skipped_idxs = [0, 1, 2, 5] - >>> _recombine_skipped(tokens, skipped_idxs) - ["foo bar", "baz"] - """ - skipped_tokens = [] - for i, idx in enumerate(sorted(skipped_idxs)): - if i > 0 and idx - 1 == skipped_idxs[i - 1]: - skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] - else: - skipped_tokens.append(tokens[idx]) + # ------------------------------------------------------------------ + # Handling for individual tokens. These are kept as methods instead + # of functions for the sake of customizability via subclassing. - return skipped_tokens + def _parsems(self, value): + """Parse a I[.F] seconds value into (seconds, microseconds).""" + if "." not in value: + return int(value), 0 + else: + i, f = value.split(".") + return int(i), int(f.ljust(6, "0")[:6]) + + def _to_decimal(self, val): + try: + decimal_value = Decimal(val) + # See GH 662, edge case, infinite value should not be converted + # via `_to_decimal` + if not decimal_value.is_finite(): + raise ValueError("Converted decimal value is infinite or NaN") + except Exception as e: + msg = "Could not convert %s to decimal" % val + six.raise_from(ValueError(msg), e) + else: + return decimal_value + + # ------------------------------------------------------------------ + # Post-Parsing construction of datetime output. These are kept as + # methods instead of functions for the sake of customizability via + # subclassing. def _build_tzinfo(self, tzinfos, tzname, tzoffset): if callable(tzinfos): @@ -1164,6 +1169,9 @@ class parser(object): tzinfo = tz.tzstr(tzdata) elif isinstance(tzdata, integer_types): tzinfo = tz.tzoffset(tzname, tzdata) + else: + raise TypeError("Offset must be tzinfo subclass, tz string, " + "or int offset.") return tzinfo def _build_tzaware(self, naive, res, tzinfos): @@ -1181,10 +1189,10 @@ class parser(object): # This is mostly relevant for winter GMT zones parsed in the UK if (aware.tzname() != res.tzname and res.tzname in self.info.UTCZONE): - aware = aware.replace(tzinfo=tz.tzutc()) + aware = aware.replace(tzinfo=tz.UTC) elif res.tzoffset == 0: - aware = naive.replace(tzinfo=tz.tzutc()) + aware = naive.replace(tzinfo=tz.UTC) elif res.tzoffset: aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) @@ -1239,17 +1247,21 @@ class parser(object): return dt - def _to_decimal(self, val): - try: - decimal_value = Decimal(val) - # See GH 662, edge case, infinite value should not be converted via `_to_decimal` - if not decimal_value.is_finite(): - raise ValueError("Converted decimal value is infinite or NaN") - except Exception as e: - msg = "Could not convert %s to decimal" % val - six.raise_from(ValueError(msg), e) - else: - return decimal_value + def _recombine_skipped(self, tokens, skipped_idxs): + """ + >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] + >>> skipped_idxs = [0, 1, 2, 5] + >>> _recombine_skipped(tokens, skipped_idxs) + ["foo bar", "baz"] + """ + skipped_tokens = [] + for i, idx in enumerate(sorted(skipped_idxs)): + if i > 0 and idx - 1 == skipped_idxs[i - 1]: + skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] + else: + skipped_tokens.append(tokens[idx]) + + return skipped_tokens DEFAULTPARSER = parser() @@ -1341,10 +1353,10 @@ def parse(timestr, parserinfo=None, **kwargs): first element being a :class:`datetime.datetime` object, the second a tuple containing the fuzzy tokens. - :raises ValueError: - Raised for invalid or unknown string format, if the provided - :class:`tzinfo` is not in a valid format, or if an invalid date - would be created. + :raises ParserError: + Raised for invalid or unknown string formats, if the provided + :class:`tzinfo` is not in a valid format, or if an invalid date would + be created. :raises OverflowError: Raised if the parsed date exceeds the largest valid C integer on @@ -1573,6 +1585,29 @@ DEFAULTTZPARSER = _tzparser() def _parsetz(tzstr): return DEFAULTTZPARSER.parse(tzstr) + +class ParserError(ValueError): + """Exception subclass used for any failure to parse a datetime string. + + This is a subclass of :py:exc:`ValueError`, and should be raised any time + earlier versions of ``dateutil`` would have raised ``ValueError``. + + .. versionadded:: 2.8.1 + """ + def __str__(self): + try: + return self.args[0] % self.args[1:] + except (TypeError, IndexError): + return super(ParserError, self).__str__() + + def __repr__(self): + args = ", ".join("'%s'" % arg for arg in self.args) + return "%s(%s)" % (self.__class__.__name__, args) + + class UnknownTimezoneWarning(RuntimeWarning): - """Raised when the parser finds a timezone it cannot parse into a tzinfo""" + """Raised when the parser finds a timezone it cannot parse into a tzinfo. + + .. versionadded:: 2.7.0 + """ # vim:ts=4:sw=4:et diff --git a/libs/common/dateutil/parser/isoparser.py b/libs/common/dateutil/parser/isoparser.py index cd27f93d..5d7bee38 100644 --- a/libs/common/dateutil/parser/isoparser.py +++ b/libs/common/dateutil/parser/isoparser.py @@ -88,10 +88,12 @@ class isoparser(object): - ``hh`` - ``hh:mm`` or ``hhmm`` - ``hh:mm:ss`` or ``hhmmss`` - - ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits) + - ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits) Midnight is a special case for `hh`, as the standard supports both - 00:00 and 24:00 as a representation. + 00:00 and 24:00 as a representation. The decimal separator can be + either a dot or a comma. + .. caution:: @@ -137,6 +139,10 @@ class isoparser(object): else: raise ValueError('String contains unknown ISO components') + if len(components) > 3 and components[3] == 24: + components[3] = 0 + return datetime(*components) + timedelta(days=1) + return datetime(*components) @_takes_ascii @@ -153,7 +159,7 @@ class isoparser(object): components, pos = self._parse_isodate(datestr) if pos < len(datestr): raise ValueError('String contains unknown ISO ' + - 'components: {}'.format(datestr)) + 'components: {!r}'.format(datestr.decode('ascii'))) return date(*components) @_takes_ascii @@ -167,7 +173,10 @@ class isoparser(object): :return: Returns a :class:`datetime.time` object """ - return time(*self._parse_isotime(timestr)) + components = self._parse_isotime(timestr) + if components[0] == 24: + components[0] = 0 + return time(*components) @_takes_ascii def parse_tzstr(self, tzstr, zero_as_utc=True): @@ -190,10 +199,9 @@ class isoparser(object): return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc) # Constants - _MICROSECOND_END_REGEX = re.compile(b'[-+Z]+') _DATE_SEP = b'-' _TIME_SEP = b':' - _MICRO_SEP = b'.' + _FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)') def _parse_isodate(self, dt_str): try: @@ -325,39 +333,42 @@ class isoparser(object): pos = 0 comp = -1 - if len(timestr) < 2: + if len_str < 2: raise ValueError('ISO time too short') - has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP + has_sep = False while pos < len_str and comp < 5: comp += 1 - if timestr[pos:pos + 1] in b'-+Z': + if timestr[pos:pos + 1] in b'-+Zz': # Detect time zone boundary components[-1] = self._parse_tzstr(timestr[pos:]) pos = len_str break + if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP: + has_sep = True + pos += 1 + elif comp == 2 and has_sep: + if timestr[pos:pos+1] != self._TIME_SEP: + raise ValueError('Inconsistent use of colon separator') + pos += 1 + if comp < 3: # Hour, minute, second components[comp] = int(timestr[pos:pos + 2]) pos += 2 - if (has_sep and pos < len_str and - timestr[pos:pos + 1] == self._TIME_SEP): - pos += 1 if comp == 3: - # Microsecond - if timestr[pos:pos + 1] != self._MICRO_SEP: + # Fraction of a second + frac = self._FRACTION_REGEX.match(timestr[pos:]) + if not frac: continue - pos += 1 - us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6], - 1)[0] - + us_str = frac.group(1)[:6] # Truncate to microseconds components[comp] = int(us_str) * 10**(6 - len(us_str)) - pos += len(us_str) + pos += len(frac.group()) if pos < len_str: raise ValueError('Unused components in ISO string') @@ -366,13 +377,12 @@ class isoparser(object): # Standard supports 00:00 and 24:00 as representations of midnight if any(component != 0 for component in components[1:4]): raise ValueError('Hour may only be 24 at 24:00:00.000') - components[0] = 0 return components def _parse_tzstr(self, tzstr, zero_as_utc=True): - if tzstr == b'Z': - return tz.tzutc() + if tzstr == b'Z' or tzstr == b'z': + return tz.UTC if len(tzstr) not in {3, 5, 6}: raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters') @@ -391,7 +401,7 @@ class isoparser(object): minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):]) if zero_as_utc and hours == 0 and minutes == 0: - return tz.tzutc() + return tz.UTC else: if minutes > 59: raise ValueError('Invalid minutes in time zone offset') diff --git a/libs/common/dateutil/relativedelta.py b/libs/common/dateutil/relativedelta.py index 1e0d6165..a9e85f7e 100644 --- a/libs/common/dateutil/relativedelta.py +++ b/libs/common/dateutil/relativedelta.py @@ -17,8 +17,12 @@ __all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"] class relativedelta(object): """ - The relativedelta type is based on the specification of the excellent - work done by M.-A. Lemburg in his + The relativedelta type is designed to be applied to an existing datetime and + can replace specific components of that datetime, or represents an interval + of time. + + It is based on the specification of the excellent work done by M.-A. Lemburg + in his `mx.DateTime `_ extension. However, notice that this type does *NOT* implement the same algorithm as his work. Do *NOT* expect it to behave like mx.DateTime's counterpart. @@ -41,17 +45,19 @@ class relativedelta(object): years, months, weeks, days, hours, minutes, seconds, microseconds: Relative information, may be negative (argument is plural); adding or subtracting a relativedelta with relative information performs - the corresponding aritmetic operation on the original datetime value + the corresponding arithmetic operation on the original datetime value with the information in the relativedelta. weekday: - One of the weekday instances (MO, TU, etc). These - instances may receive a parameter N, specifying the Nth - weekday, which could be positive or negative (like MO(+1) - or MO(-2). Not specifying it is the same as specifying - +1. You can also use an integer, where 0=MO. Notice that - if the calculated date is already Monday, for example, - using MO(1) or MO(-1) won't change the day. + One of the weekday instances (MO, TU, etc) available in the + relativedelta module. These instances may receive a parameter N, + specifying the Nth weekday, which could be positive or negative + (like MO(+1) or MO(-2)). Not specifying it is the same as specifying + +1. You can also use an integer, where 0=MO. This argument is always + relative e.g. if the calculated date is already Monday, using MO(1) + or MO(-1) won't change the day. To effectively make it absolute, use + it in combination with the day argument (e.g. day=1, MO(1) for first + Monday of the month). leapdays: Will add given days to the date found, if year is a leap @@ -82,9 +88,12 @@ class relativedelta(object): For example + >>> from datetime import datetime + >>> from dateutil.relativedelta import relativedelta, MO >>> dt = datetime(2018, 4, 9, 13, 37, 0) >>> delta = relativedelta(hours=25, day=1, weekday=MO(1)) - datetime(2018, 4, 2, 14, 37, 0) + >>> dt + delta + datetime.datetime(2018, 4, 2, 14, 37) First, the day is set to 1 (the first of the month), then 25 hours are added, to get to the 2nd day and 14th hour, finally the @@ -276,7 +285,7 @@ class relativedelta(object): values for the relative attributes. >>> relativedelta(days=1.5, hours=2).normalized() - relativedelta(days=1, hours=14) + relativedelta(days=+1, hours=+14) :return: Returns a :class:`dateutil.relativedelta.relativedelta` object. diff --git a/libs/common/dateutil/rrule.py b/libs/common/dateutil/rrule.py index 8e9c2af1..b3203393 100644 --- a/libs/common/dateutil/rrule.py +++ b/libs/common/dateutil/rrule.py @@ -5,27 +5,27 @@ the recurrence rules documented in the `iCalendar RFC `_, including support for caching of results. """ -import itertools -import datetime import calendar +import datetime +import heapq +import itertools import re import sys +from functools import wraps +# For warning about deprecation of until and count +from warnings import warn + +from six import advance_iterator, integer_types + +from six.moves import _thread, range + +from ._common import weekday as weekdaybase try: from math import gcd except ImportError: from fractions import gcd -from six import advance_iterator, integer_types -from six.moves import _thread, range -import heapq - -from ._common import weekday as weekdaybase -from .tz import tzutc, tzlocal - -# For warning about deprecation of until and count -from warnings import warn - __all__ = ["rrule", "rruleset", "rrulestr", "YEARLY", "MONTHLY", "WEEKLY", "DAILY", "HOURLY", "MINUTELY", "SECONDLY", @@ -82,6 +82,7 @@ def _invalidates_cache(f): Decorator for rruleset methods which may invalidate the cached length. """ + @wraps(f) def inner_func(self, *args, **kwargs): rv = f(self, *args, **kwargs) self._invalidate_cache() @@ -178,7 +179,7 @@ class rrulebase(object): return False return False - # __len__() introduces a large performance penality. + # __len__() introduces a large performance penalty. def count(self): """ Returns the number of recurrences in this set. It will have go trough the whole recurrence, if this hasn't been done before. """ @@ -353,20 +354,26 @@ class rrule(rrulebase): from calendar.firstweekday(), and may be modified by calendar.setfirstweekday(). :param count: - How many occurrences will be generated. + If given, this determines how many occurrences will be generated. .. note:: - As of version 2.5.0, the use of the ``until`` keyword together - with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10. + As of version 2.5.0, the use of the keyword ``until`` in conjunction + with ``count`` is deprecated, to make sure ``dateutil`` is fully + compliant with `RFC-5545 Sec. 3.3.10 `_. Therefore, ``until`` and ``count`` + **must not** occur in the same call to ``rrule``. :param until: - If given, this must be a datetime instance, that will specify the + If given, this must be a datetime instance specifying the upper-bound limit of the recurrence. The last recurrence in the rule is the greatest datetime that is less than or equal to the value specified in the ``until`` parameter. .. note:: - As of version 2.5.0, the use of the ``until`` keyword together - with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10. + As of version 2.5.0, the use of the keyword ``until`` in conjunction + with ``count`` is deprecated, to make sure ``dateutil`` is fully + compliant with `RFC-5545 Sec. 3.3.10 `_. Therefore, ``until`` and ``count`` + **must not** occur in the same call to ``rrule``. :param bysetpos: If given, it must be either an integer, or a sequence of integers, positive or negative. Each given integer will specify an occurrence @@ -429,7 +436,7 @@ class rrule(rrulebase): if not dtstart: if until and until.tzinfo: dtstart = datetime.datetime.now(tz=until.tzinfo).replace(microsecond=0) - else: + else: dtstart = datetime.datetime.now().replace(microsecond=0) elif not isinstance(dtstart, datetime.datetime): dtstart = datetime.datetime.fromordinal(dtstart.toordinal()) @@ -1406,7 +1413,52 @@ class rruleset(rrulebase): self._len = total + + class _rrulestr(object): + """ Parses a string representation of a recurrence rule or set of + recurrence rules. + + :param s: + Required, a string defining one or more recurrence rules. + + :param dtstart: + If given, used as the default recurrence start if not specified in the + rule string. + + :param cache: + If set ``True`` caching of results will be enabled, improving + performance of multiple queries considerably. + + :param unfold: + If set ``True`` indicates that a rule string is split over more + than one line and should be joined before processing. + + :param forceset: + If set ``True`` forces a :class:`dateutil.rrule.rruleset` to + be returned. + + :param compatible: + If set ``True`` forces ``unfold`` and ``forceset`` to be ``True``. + + :param ignoretz: + If set ``True``, time zones in parsed strings are ignored and a naive + :class:`datetime.datetime` object is returned. + + :param tzids: + If given, a callable or mapping used to retrieve a + :class:`datetime.tzinfo` from a string representation. + Defaults to :func:`dateutil.tz.gettz`. + + :param tzinfos: + Additional time zone names / aliases which may be present in a string + representation. See :func:`dateutil.parser.parse` for more + information. + + :return: + Returns a :class:`dateutil.rrule.rruleset` or + :class:`dateutil.rrule.rrule` + """ _freq_map = {"YEARLY": YEARLY, "MONTHLY": MONTHLY, @@ -1508,6 +1560,58 @@ class _rrulestr(object): raise ValueError("invalid '%s': %s" % (name, value)) return rrule(dtstart=dtstart, cache=cache, **rrkwargs) + def _parse_date_value(self, date_value, parms, rule_tzids, + ignoretz, tzids, tzinfos): + global parser + if not parser: + from dateutil import parser + + datevals = [] + value_found = False + TZID = None + + for parm in parms: + if parm.startswith("TZID="): + try: + tzkey = rule_tzids[parm.split('TZID=')[-1]] + except KeyError: + continue + if tzids is None: + from . import tz + tzlookup = tz.gettz + elif callable(tzids): + tzlookup = tzids + else: + tzlookup = getattr(tzids, 'get', None) + if tzlookup is None: + msg = ('tzids must be a callable, mapping, or None, ' + 'not %s' % tzids) + raise ValueError(msg) + + TZID = tzlookup(tzkey) + continue + + # RFC 5445 3.8.2.4: The VALUE parameter is optional, but may be found + # only once. + if parm not in {"VALUE=DATE-TIME", "VALUE=DATE"}: + raise ValueError("unsupported parm: " + parm) + else: + if value_found: + msg = ("Duplicate value parameter found in: " + parm) + raise ValueError(msg) + value_found = True + + for datestr in date_value.split(','): + date = parser.parse(datestr, ignoretz=ignoretz, tzinfos=tzinfos) + if TZID is not None: + if date.tzinfo is None: + date = date.replace(tzinfo=TZID) + else: + raise ValueError('DTSTART/EXDATE specifies multiple timezone') + datevals.append(date) + + return datevals + def _parse_rfc(self, s, dtstart=None, cache=False, @@ -1580,54 +1684,18 @@ class _rrulestr(object): raise ValueError("unsupported EXRULE parm: "+parm) exrulevals.append(value) elif name == "EXDATE": - for parm in parms: - if parm != "VALUE=DATE-TIME": - raise ValueError("unsupported EXDATE parm: "+parm) - exdatevals.append(value) + exdatevals.extend( + self._parse_date_value(value, parms, + TZID_NAMES, ignoretz, + tzids, tzinfos) + ) elif name == "DTSTART": - # RFC 5445 3.8.2.4: The VALUE parameter is optional, but - # may be found only once. - value_found = False - TZID = None - valid_values = {"VALUE=DATE-TIME", "VALUE=DATE"} - for parm in parms: - if parm.startswith("TZID="): - try: - tzkey = TZID_NAMES[parm.split('TZID=')[-1]] - except KeyError: - continue - if tzids is None: - from . import tz - tzlookup = tz.gettz - elif callable(tzids): - tzlookup = tzids - else: - tzlookup = getattr(tzids, 'get', None) - if tzlookup is None: - msg = ('tzids must be a callable, ' + - 'mapping, or None, ' + - 'not %s' % tzids) - raise ValueError(msg) - - TZID = tzlookup(tzkey) - continue - if parm not in valid_values: - raise ValueError("unsupported DTSTART parm: "+parm) - else: - if value_found: - msg = ("Duplicate value parameter found in " + - "DTSTART: " + parm) - raise ValueError(msg) - value_found = True - if not parser: - from dateutil import parser - dtstart = parser.parse(value, ignoretz=ignoretz, - tzinfos=tzinfos) - if TZID is not None: - if dtstart.tzinfo is None: - dtstart = dtstart.replace(tzinfo=TZID) - else: - raise ValueError('DTSTART specifies multiple timezones') + dtvals = self._parse_date_value(value, parms, TZID_NAMES, + ignoretz, tzids, tzinfos) + if len(dtvals) != 1: + raise ValueError("Multiple DTSTART values specified:" + + value) + dtstart = dtvals[0] else: raise ValueError("unsupported property: "+name) if (forceset or len(rrulevals) > 1 or rdatevals @@ -1649,10 +1717,7 @@ class _rrulestr(object): ignoretz=ignoretz, tzinfos=tzinfos)) for value in exdatevals: - for datestr in value.split(','): - rset.exdate(parser.parse(datestr, - ignoretz=ignoretz, - tzinfos=tzinfos)) + rset.exdate(value) if compatible and dtstart: rset.rdate(dtstart) return rset diff --git a/libs/common/dateutil/tz/__init__.py b/libs/common/dateutil/tz/__init__.py index 5a2d9cd6..af1352c4 100644 --- a/libs/common/dateutil/tz/__init__.py +++ b/libs/common/dateutil/tz/__init__.py @@ -2,11 +2,6 @@ from .tz import * from .tz import __doc__ -#: Convenience constant providing a :class:`tzutc()` instance -#: -#: .. versionadded:: 2.7.0 -UTC = tzutc() - __all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange", "tzstr", "tzical", "tzwin", "tzwinlocal", "gettz", "enfold", "datetime_ambiguous", "datetime_exists", diff --git a/libs/common/dateutil/tz/_common.py b/libs/common/dateutil/tz/_common.py index ccabb7da..e6ac1183 100644 --- a/libs/common/dateutil/tz/_common.py +++ b/libs/common/dateutil/tz/_common.py @@ -1,4 +1,4 @@ -from six import PY3 +from six import PY2 from functools import wraps @@ -16,14 +16,18 @@ def tzname_in_python2(namefunc): tzname() API changed in Python 3. It used to return bytes, but was changed to unicode strings """ - def adjust_encoding(*args, **kwargs): - name = namefunc(*args, **kwargs) - if name is not None and not PY3: - name = name.encode() + if PY2: + @wraps(namefunc) + def adjust_encoding(*args, **kwargs): + name = namefunc(*args, **kwargs) + if name is not None: + name = name.encode() - return name + return name - return adjust_encoding + return adjust_encoding + else: + return namefunc # The following is adapted from Alexander Belopolsky's tz library @@ -208,7 +212,7 @@ class _tzinfo(tzinfo): Since this is the one time that we *know* we have an unambiguous datetime object, we take this opportunity to determine whether the datetime is ambiguous and in a "fold" state (e.g. if it's the first - occurence, chronologically, of the ambiguous datetime). + occurrence, chronologically, of the ambiguous datetime). :param dt: A timezone-aware :class:`datetime.datetime` object. @@ -246,7 +250,7 @@ class _tzinfo(tzinfo): Since this is the one time that we *know* we have an unambiguous datetime object, we take this opportunity to determine whether the datetime is ambiguous and in a "fold" state (e.g. if it's the first - occurance, chronologically, of the ambiguous datetime). + occurrence, chronologically, of the ambiguous datetime). :param dt: A timezone-aware :class:`datetime.datetime` object. diff --git a/libs/common/dateutil/tz/_factories.py b/libs/common/dateutil/tz/_factories.py index de2e0c1d..f8a65891 100644 --- a/libs/common/dateutil/tz/_factories.py +++ b/libs/common/dateutil/tz/_factories.py @@ -1,4 +1,8 @@ from datetime import timedelta +import weakref +from collections import OrderedDict + +from six.moves import _thread class _TzSingleton(type): @@ -11,6 +15,7 @@ class _TzSingleton(type): cls.__instance = super(_TzSingleton, cls).__call__() return cls.__instance + class _TzFactory(type): def instance(cls, *args, **kwargs): """Alternate constructor that returns a fresh instance""" @@ -19,7 +24,11 @@ class _TzFactory(type): class _TzOffsetFactory(_TzFactory): def __init__(cls, *args, **kwargs): - cls.__instances = {} + cls.__instances = weakref.WeakValueDictionary() + cls.__strong_cache = OrderedDict() + cls.__strong_cache_size = 8 + + cls._cache_lock = _thread.allocate_lock() def __call__(cls, name, offset): if isinstance(offset, timedelta): @@ -31,12 +40,25 @@ class _TzOffsetFactory(_TzFactory): if instance is None: instance = cls.__instances.setdefault(key, cls.instance(name, offset)) + + # This lock may not be necessary in Python 3. See GH issue #901 + with cls._cache_lock: + cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance) + + # Remove an item if the strong cache is overpopulated + if len(cls.__strong_cache) > cls.__strong_cache_size: + cls.__strong_cache.popitem(last=False) + return instance class _TzStrFactory(_TzFactory): def __init__(cls, *args, **kwargs): - cls.__instances = {} + cls.__instances = weakref.WeakValueDictionary() + cls.__strong_cache = OrderedDict() + cls.__strong_cache_size = 8 + + cls.__cache_lock = _thread.allocate_lock() def __call__(cls, s, posix_offset=False): key = (s, posix_offset) @@ -45,5 +67,14 @@ class _TzStrFactory(_TzFactory): if instance is None: instance = cls.__instances.setdefault(key, cls.instance(s, posix_offset)) + + # This lock may not be necessary in Python 3. See GH issue #901 + with cls.__cache_lock: + cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance) + + # Remove an item if the strong cache is overpopulated + if len(cls.__strong_cache) > cls.__strong_cache_size: + cls.__strong_cache.popitem(last=False) + return instance diff --git a/libs/common/dateutil/tz/tz.py b/libs/common/dateutil/tz/tz.py index ac82b9c8..c67f56d4 100644 --- a/libs/common/dateutil/tz/tz.py +++ b/libs/common/dateutil/tz/tz.py @@ -13,6 +13,8 @@ import time import sys import os import bisect +import weakref +from collections import OrderedDict import six from six import string_types @@ -28,6 +30,9 @@ try: except ImportError: tzwin = tzwinlocal = None +# For warning about rounding tzinfo +from warnings import warn + ZERO = datetime.timedelta(0) EPOCH = datetime.datetime.utcfromtimestamp(0) EPOCHORDINAL = EPOCH.toordinal() @@ -118,6 +123,12 @@ class tzutc(datetime.tzinfo): __reduce__ = object.__reduce__ +#: Convenience constant providing a :class:`tzutc()` instance +#: +#: .. versionadded:: 2.7.0 +UTC = tzutc() + + @six.add_metaclass(_TzOffsetFactory) class tzoffset(datetime.tzinfo): """ @@ -137,7 +148,8 @@ class tzoffset(datetime.tzinfo): offset = offset.total_seconds() except (TypeError, AttributeError): pass - self._offset = datetime.timedelta(seconds=offset) + + self._offset = datetime.timedelta(seconds=_get_supported_offset(offset)) def utcoffset(self, dt): return self._offset @@ -373,7 +385,7 @@ class _tzfile(object): class tzfile(_tzinfo): """ - This is a ``tzinfo`` subclass thant allows one to use the ``tzfile(5)`` + This is a ``tzinfo`` subclass that allows one to use the ``tzfile(5)`` format timezone files to extract current and historical zone information. :param fileobj: @@ -460,7 +472,7 @@ class tzfile(_tzinfo): if fileobj is not None: if not file_opened_here: - fileobj = _ContextWrapper(fileobj) + fileobj = _nullcontext(fileobj) with fileobj as file_stream: tzobj = self._read_tzfile(file_stream) @@ -600,10 +612,7 @@ class tzfile(_tzinfo): out.ttinfo_list = [] for i in range(typecnt): gmtoff, isdst, abbrind = ttinfo[i] - # Round to full-minutes if that's not the case. Python's - # datetime doesn't accept sub-minute timezones. Check - # http://python.org/sf/1447945 for some information. - gmtoff = 60 * ((gmtoff + 30) // 60) + gmtoff = _get_supported_offset(gmtoff) tti = _ttinfo() tti.offset = gmtoff tti.dstoffset = datetime.timedelta(0) @@ -655,37 +664,44 @@ class tzfile(_tzinfo): # isgmt are off, so it should be in wall time. OTOH, it's # always in gmt time. Let me know if you have comments # about this. - laststdoffset = None + lastdst = None + lastoffset = None + lastdstoffset = None + lastbaseoffset = None out.trans_list = [] + for i, tti in enumerate(out.trans_idx): - if not tti.isdst: - offset = tti.offset - laststdoffset = offset - else: - if laststdoffset is not None: - # Store the DST offset as well and update it in the list - tti.dstoffset = tti.offset - laststdoffset - out.trans_idx[i] = tti + offset = tti.offset + dstoffset = 0 - offset = laststdoffset or 0 + if lastdst is not None: + if tti.isdst: + if not lastdst: + dstoffset = offset - lastoffset - out.trans_list.append(out.trans_list_utc[i] + offset) + if not dstoffset and lastdstoffset: + dstoffset = lastdstoffset - # In case we missed any DST offsets on the way in for some reason, make - # a second pass over the list, looking for the /next/ DST offset. - laststdoffset = None - for i in reversed(range(len(out.trans_idx))): - tti = out.trans_idx[i] - if tti.isdst: - if not (tti.dstoffset or laststdoffset is None): - tti.dstoffset = tti.offset - laststdoffset - else: - laststdoffset = tti.offset + tti.dstoffset = datetime.timedelta(seconds=dstoffset) + lastdstoffset = dstoffset - if not isinstance(tti.dstoffset, datetime.timedelta): - tti.dstoffset = datetime.timedelta(seconds=tti.dstoffset) + # If a time zone changes its base offset during a DST transition, + # then you need to adjust by the previous base offset to get the + # transition time in local time. Otherwise you use the current + # base offset. Ideally, I would have some mathematical proof of + # why this is true, but I haven't really thought about it enough. + baseoffset = offset - dstoffset + adjustment = baseoffset + if (lastbaseoffset is not None and baseoffset != lastbaseoffset + and tti.isdst != lastdst): + # The base DST has changed + adjustment = lastbaseoffset - out.trans_idx[i] = tti + lastdst = tti.isdst + lastoffset = offset + lastbaseoffset = baseoffset + + out.trans_list.append(out.trans_list_utc[i] + adjustment) out.trans_idx = tuple(out.trans_idx) out.trans_list = tuple(out.trans_list) @@ -1255,7 +1271,7 @@ class tzical(object): fileobj = open(fileobj, 'r') else: self._s = getattr(fileobj, 'name', repr(fileobj)) - fileobj = _ContextWrapper(fileobj) + fileobj = _nullcontext(fileobj) self._vtz = {} @@ -1528,7 +1544,9 @@ def __get_gettz(): """ def __init__(self): - self.__instances = {} + self.__instances = weakref.WeakValueDictionary() + self.__strong_cache_size = 8 + self.__strong_cache = OrderedDict() self._cache_lock = _thread.allocate_lock() def __call__(self, name=None): @@ -1537,17 +1555,37 @@ def __get_gettz(): if rv is None: rv = self.nocache(name=name) - if not (name is None or isinstance(rv, tzlocal_classes)): + if not (name is None + or isinstance(rv, tzlocal_classes) + or rv is None): # tzlocal is slightly more complicated than the other # time zone providers because it depends on environment # at construction time, so don't cache that. + # + # We also cannot store weak references to None, so we + # will also not store that. self.__instances[name] = rv + else: + # No need for strong caching, return immediately + return rv + + self.__strong_cache[name] = self.__strong_cache.pop(name, rv) + + if len(self.__strong_cache) > self.__strong_cache_size: + self.__strong_cache.popitem(last=False) return rv + def set_cache_size(self, size): + with self._cache_lock: + self.__strong_cache_size = size + while len(self.__strong_cache) > size: + self.__strong_cache.popitem(last=False) + def cache_clear(self): with self._cache_lock: - self.__instances = {} + self.__instances = weakref.WeakValueDictionary() + self.__strong_cache.clear() @staticmethod def nocache(name=None): @@ -1558,7 +1596,7 @@ def __get_gettz(): name = os.environ["TZ"] except KeyError: pass - if name is None or name == ":": + if name is None or name in ("", ":"): for filepath in TZFILES: if not os.path.isabs(filepath): filename = filepath @@ -1577,8 +1615,15 @@ def __get_gettz(): else: tz = tzlocal() else: - if name.startswith(":"): - name = name[1:] + try: + if name.startswith(":"): + name = name[1:] + except TypeError as e: + if isinstance(name, bytes): + new_msg = "gettz argument should be str, not bytes" + six.raise_from(TypeError(new_msg), e) + else: + raise if os.path.isabs(name): if os.path.isfile(name): tz = tzfile(name) @@ -1601,7 +1646,8 @@ def __get_gettz(): if tzwin is not None: try: tz = tzwin(name) - except WindowsError: + except (WindowsError, UnicodeEncodeError): + # UnicodeEncodeError is for Python 2.7 compat tz = None if not tz: @@ -1622,7 +1668,7 @@ def __get_gettz(): break else: if name in ("GMT", "UTC"): - tz = tzutc() + tz = UTC elif name in time.tzname: tz = tzlocal() return tz @@ -1662,7 +1708,7 @@ def datetime_exists(dt, tz=None): # This is essentially a test of whether or not the datetime can survive # a round trip to UTC. - dt_rt = dt.replace(tzinfo=tz).astimezone(tzutc()).astimezone(tz) + dt_rt = dt.replace(tzinfo=tz).astimezone(UTC).astimezone(tz) dt_rt = dt_rt.replace(tzinfo=None) return dt == dt_rt @@ -1768,18 +1814,36 @@ def _datetime_to_timestamp(dt): return (dt.replace(tzinfo=None) - EPOCH).total_seconds() -class _ContextWrapper(object): - """ - Class for wrapping contexts so that they are passed through in a - with statement. - """ - def __init__(self, context): - self.context = context +if sys.version_info >= (3, 6): + def _get_supported_offset(second_offset): + return second_offset +else: + def _get_supported_offset(second_offset): + # For python pre-3.6, round to full-minutes if that's not the case. + # Python's datetime doesn't accept sub-minute timezones. Check + # http://python.org/sf/1447945 or https://bugs.python.org/issue5288 + # for some information. + old_offset = second_offset + calculated_offset = 60 * ((second_offset + 30) // 60) + return calculated_offset - def __enter__(self): - return self.context - def __exit__(*args, **kwargs): - pass +try: + # Python 3.7 feature + from contextlib import nullcontext as _nullcontext +except ImportError: + class _nullcontext(object): + """ + Class for wrapping contexts so that they are passed through in a + with statement. + """ + def __init__(self, context): + self.context = context + + def __enter__(self): + return self.context + + def __exit__(*args, **kwargs): + pass # vim:ts=4:sw=4:et diff --git a/libs/common/dateutil/tz/win.py b/libs/common/dateutil/tz/win.py index def4353a..cde07ba7 100644 --- a/libs/common/dateutil/tz/win.py +++ b/libs/common/dateutil/tz/win.py @@ -1,3 +1,11 @@ +# -*- coding: utf-8 -*- +""" +This module provides an interface to the native time zone data on Windows, +including :py:class:`datetime.tzinfo` implementations. + +Attempting to import this module on a non-Windows platform will raise an +:py:obj:`ImportError`. +""" # This code was originally contributed by Jeffrey Harris. import datetime import struct @@ -39,7 +47,7 @@ TZKEYNAME = _settzkeyname() class tzres(object): """ - Class for accessing `tzres.dll`, which contains timezone name related + Class for accessing ``tzres.dll``, which contains timezone name related resources. .. versionadded:: 2.5.0 @@ -72,9 +80,10 @@ class tzres(object): :param offset: A positive integer value referring to a string from the tzres dll. - ..note: + .. note:: + Offsets found in the registry are generally of the form - `@tzres.dll,-114`. The offset in this case if 114, not -114. + ``@tzres.dll,-114``. The offset in this case is 114, not -114. """ resource = self.p_wchar() @@ -146,6 +155,9 @@ class tzwinbase(tzrangebase): return result def display(self): + """ + Return the display name of the time zone. + """ return self._display def transitions(self, year): @@ -188,6 +200,17 @@ class tzwinbase(tzrangebase): class tzwin(tzwinbase): + """ + Time zone object created from the zone info in the Windows registry + + These are similar to :py:class:`dateutil.tz.tzrange` objects in that + the time zone data is provided in the format of a single offset rule + for either 0 or 2 time zone transitions per year. + + :param: name + The name of a Windows time zone key, e.g. "Eastern Standard Time". + The full list of keys can be retrieved with :func:`tzwin.list`. + """ def __init__(self, name): self._name = name @@ -234,6 +257,22 @@ class tzwin(tzwinbase): class tzwinlocal(tzwinbase): + """ + Class representing the local time zone information in the Windows registry + + While :class:`dateutil.tz.tzlocal` makes system calls (via the :mod:`time` + module) to retrieve time zone information, ``tzwinlocal`` retrieves the + rules directly from the Windows registry and creates an object like + :class:`dateutil.tz.tzwin`. + + Because Windows does not have an equivalent of :func:`time.tzset`, on + Windows, :class:`dateutil.tz.tzlocal` instances will always reflect the + time zone settings *at the time that the process was started*, meaning + changes to the machine's time zone settings during the run of a program + on Windows will **not** be reflected by :class:`dateutil.tz.tzlocal`. + Because ``tzwinlocal`` reads the registry directly, it is unaffected by + this issue. + """ def __init__(self): with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey: diff --git a/libs/common/dateutil/utils.py b/libs/common/dateutil/utils.py index ebcce6aa..dd2d245a 100644 --- a/libs/common/dateutil/utils.py +++ b/libs/common/dateutil/utils.py @@ -28,7 +28,7 @@ def today(tzinfo=None): def default_tzinfo(dt, tzinfo): """ - Sets the the ``tzinfo`` parameter on naive datetimes only + Sets the ``tzinfo`` parameter on naive datetimes only This is useful for example when you are provided a datetime that may have either an implicit or explicit time zone, such as when parsing a time zone @@ -63,7 +63,7 @@ def default_tzinfo(dt, tzinfo): def within_delta(dt1, dt2, delta): """ - Useful for comparing two datetimes that may a negilible difference + Useful for comparing two datetimes that may have a negligible difference to be considered equal. """ delta = abs(delta) diff --git a/libs/common/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz b/libs/common/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz index 6e8c05ef..524c48e1 100644 Binary files a/libs/common/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz and b/libs/common/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz differ diff --git a/libs/common/dateutil/zoneinfo/rebuild.py b/libs/common/dateutil/zoneinfo/rebuild.py index 78f0d1a0..684c6586 100644 --- a/libs/common/dateutil/zoneinfo/rebuild.py +++ b/libs/common/dateutil/zoneinfo/rebuild.py @@ -3,7 +3,7 @@ import os import tempfile import shutil import json -from subprocess import check_call +from subprocess import check_call, check_output from tarfile import TarFile from dateutil.zoneinfo import METADATA_FN, ZONEFILENAME @@ -23,11 +23,9 @@ def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None): for name in zonegroups: tf.extract(name, tmpdir) filepaths = [os.path.join(tmpdir, n) for n in zonegroups] - try: - check_call(["zic", "-d", zonedir] + filepaths) - except OSError as e: - _print_on_nosuchfile(e) - raise + + _run_zic(zonedir, filepaths) + # write metadata file with open(os.path.join(zonedir, METADATA_FN), 'w') as f: json.dump(metadata, f, indent=4, sort_keys=True) @@ -40,6 +38,30 @@ def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None): shutil.rmtree(tmpdir) +def _run_zic(zonedir, filepaths): + """Calls the ``zic`` compiler in a compatible way to get a "fat" binary. + + Recent versions of ``zic`` default to ``-b slim``, while older versions + don't even have the ``-b`` option (but default to "fat" binaries). The + current version of dateutil does not support Version 2+ TZif files, which + causes problems when used in conjunction with "slim" binaries, so this + function is used to ensure that we always get a "fat" binary. + """ + + try: + help_text = check_output(["zic", "--help"]) + except OSError as e: + _print_on_nosuchfile(e) + raise + + if b"-b " in help_text: + bloat_args = ["-b", "fat"] + else: + bloat_args = [] + + check_call(["zic"] + bloat_args + ["-d", zonedir] + filepaths) + + def _print_on_nosuchfile(e): """Print helpful troubleshooting message diff --git a/libs/common/guessit/__main__.py b/libs/common/guessit/__main__.py index 9894180a..fad196d6 100644 --- a/libs/common/guessit/__main__.py +++ b/libs/common/guessit/__main__.py @@ -142,7 +142,7 @@ def main(args=None): # pylint:disable=too-many-branches if options.get('yaml'): try: - import yaml # pylint:disable=unused-variable + import yaml # pylint:disable=unused-variable,unused-import except ImportError: # pragma: no cover del options['yaml'] print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr) diff --git a/libs/common/guessit/__version__.py b/libs/common/guessit/__version__.py index f5913166..e505897b 100644 --- a/libs/common/guessit/__version__.py +++ b/libs/common/guessit/__version__.py @@ -4,4 +4,4 @@ Version module """ # pragma: no cover -__version__ = '3.0.3' +__version__ = '3.1.1' diff --git a/libs/common/guessit/api.py b/libs/common/guessit/api.py index 3bf5aae2..8e306340 100644 --- a/libs/common/guessit/api.py +++ b/libs/common/guessit/api.py @@ -82,6 +82,19 @@ def properties(options=None): return default_api.properties(options) +def suggested_expected(titles, options=None): + """ + Return a list of suggested titles to be used as `expected_title` based on the list of titles + :param titles: the filename or release name + :type titles: list|set|dict + :param options: + :type options: str|dict + :return: + :rtype: list of str + """ + return default_api.suggested_expected(titles, options) + + class GuessItApi(object): """ An api class that can be configured with custom Rebulk configuration. @@ -228,5 +241,23 @@ class GuessItApi(object): ordered = self.rebulk.customize_properties(ordered) return ordered + def suggested_expected(self, titles, options=None): + """ + Return a list of suggested titles to be used as `expected_title` based on the list of titles + :param titles: the filename or release name + :type titles: list|set|dict + :param options: + :type options: str|dict + :return: + :rtype: list of str + """ + suggested = [] + for title in titles: + guess = self.guessit(title, options) + if len(guess) != 2 or 'title' not in guess: + suggested.append(title) + + return suggested + default_api = GuessItApi() diff --git a/libs/common/guessit/backports.py b/libs/common/guessit/backports.py index 3e94e27a..c149a6b5 100644 --- a/libs/common/guessit/backports.py +++ b/libs/common/guessit/backports.py @@ -4,7 +4,7 @@ Backports """ # pragma: no-cover -# pylint: disabled +# pylint: skip-file def cmp_to_key(mycmp): """functools.cmp_to_key backport""" diff --git a/libs/common/guessit/config/options.json b/libs/common/guessit/config/options.json index 11b17271..da7c7030 100644 --- a/libs/common/guessit/config/options.json +++ b/libs/common/guessit/config/options.json @@ -1,18 +1,19 @@ { "expected_title": [ - "OSS 117" + "OSS 117", + "This is Us" ], "allowed_countries": [ "au", - "us", - "gb" + "gb", + "us" ], "allowed_languages": [ + "ca", + "cs", "de", "en", "es", - "ca", - "cs", "fr", "he", "hi", @@ -20,7 +21,9 @@ "it", "ja", "ko", + "mul", "nl", + "no", "pl", "pt", "ro", @@ -28,18 +31,50 @@ "sv", "te", "uk", - "mul", "und" ], "advanced_config": { "common_words": [ + "ca", + "cat", "de", - "it" + "he", + "it", + "no", + "por", + "rum", + "se", + "st", + "sub" ], "groups": { "starting": "([{", "ending": ")]}" }, + "audio_codec": { + "audio_channels": { + "1.0": [ + "1ch", + "mono" + ], + "2.0": [ + "2ch", + "stereo", + "re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)" + ], + "5.1": [ + "5ch", + "6ch", + "re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)", + "re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)" + ], + "7.1": [ + "7ch", + "8ch", + "re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)" + ] + } + }, "container": { "subtitles": [ "srt", @@ -59,9 +94,10 @@ "avi", "divx", "flv", - "mk3d", + "iso", "m4v", "mk2", + "mk3d", "mka", "mkv", "mov", @@ -77,12 +113,11 @@ "ram", "rm", "ts", + "vob", "wav", "webm", "wma", - "wmv", - "iso", - "vob" + "wmv" ], "torrent": [ "torrent" @@ -255,7 +290,6 @@ ], "subtitle_prefixes": [ "st", - "v", "vost", "subforced", "fansub", @@ -297,12 +331,12 @@ }, "release_group": { "forbidden_names": [ - "rip", + "bonus", "by", "for", "par", "pour", - "bonus" + "rip" ], "ignored_seps": "[]{}()" }, @@ -311,6 +345,7 @@ "23.976", "24", "25", + "29.970", "30", "48", "50", @@ -329,6 +364,7 @@ "progressive": [ "360", "480", + "540", "576", "900", "1080", @@ -342,8 +378,8 @@ "website": { "safe_tlds": [ "com", - "org", - "net" + "net", + "org" ], "safe_subdomains": [ "www" @@ -351,12 +387,200 @@ "safe_prefixes": [ "co", "com", - "org", - "net" + "net", + "org" ], "prefixes": [ "from" ] + }, + "streaming_service": { + "A&E": [ + "AE", + "A&E" + ], + "ABC": "AMBC", + "ABC Australia": "AUBC", + "Al Jazeera English": "AJAZ", + "AMC": "AMC", + "Amazon Prime": [ + "AMZN", + "Amazon", + "re:Amazon-?Prime" + ], + "Adult Swim": [ + "AS", + "re:Adult-?Swim" + ], + "America's Test Kitchen": "ATK", + "Animal Planet": "ANPL", + "AnimeLab": "ANLB", + "AOL": "AOL", + "ARD": "ARD", + "BBC iPlayer": [ + "iP", + "re:BBC-?iPlayer" + ], + "BravoTV": "BRAV", + "Canal+": "CNLP", + "Cartoon Network": "CN", + "CBC": "CBC", + "CBS": "CBS", + "CNBC": "CNBC", + "Comedy Central": [ + "CC", + "re:Comedy-?Central" + ], + "Channel 4": "4OD", + "CHRGD": "CHGD", + "Cinemax": "CMAX", + "Country Music Television": "CMT", + "Comedians in Cars Getting Coffee": "CCGC", + "Crunchy Roll": [ + "CR", + "re:Crunchy-?Roll" + ], + "Crackle": "CRKL", + "CSpan": "CSPN", + "CTV": "CTV", + "CuriosityStream": "CUR", + "CWSeed": "CWS", + "Daisuki": "DSKI", + "DC Universe": "DCU", + "Deadhouse Films": "DHF", + "DramaFever": [ + "DF", + "DramaFever" + ], + "Digiturk Diledigin Yerde": "DDY", + "Discovery": [ + "DISC", + "Discovery" + ], + "Disney": [ + "DSNY", + "Disney" + ], + "DIY Network": "DIY", + "Doc Club": "DOCC", + "DPlay": "DPLY", + "E!": "ETV", + "ePix": "EPIX", + "El Trece": "ETTV", + "ESPN": "ESPN", + "Esquire": "ESQ", + "Family": "FAM", + "Family Jr": "FJR", + "Food Network": "FOOD", + "Fox": "FOX", + "Freeform": "FREE", + "FYI Network": "FYI", + "Global": "GLBL", + "GloboSat Play": "GLOB", + "Hallmark": "HLMK", + "HBO Go": [ + "HBO", + "re:HBO-?Go" + ], + "HGTV": "HGTV", + "History": [ + "HIST", + "History" + ], + "Hulu": "HULU", + "Investigation Discovery": "ID", + "IFC": "IFC", + "iTunes": "iTunes", + "ITV": "ITV", + "Knowledge Network": "KNOW", + "Lifetime": "LIFE", + "Motor Trend OnDemand": "MTOD", + "MBC": [ + "MBC", + "MBCVOD" + ], + "MSNBC": "MNBC", + "MTV": "MTV", + "National Geographic": [ + "NATG", + "re:National-?Geographic" + ], + "NBA TV": [ + "NBA", + "re:NBA-?TV" + ], + "NBC": "NBC", + "Netflix": [ + "NF", + "Netflix" + ], + "NFL": "NFL", + "NFL Now": "NFLN", + "NHL GameCenter": "GC", + "Nickelodeon": [ + "NICK", + "Nickelodeon" + ], + "Norsk Rikskringkasting": "NRK", + "OnDemandKorea": [ + "ODK", + "OnDemandKorea" + ], + "PBS": "PBS", + "PBS Kids": "PBSK", + "Playstation Network": "PSN", + "Pluzz": "PLUZ", + "RTE One": "RTE", + "SBS (AU)": "SBS", + "SeeSo": [ + "SESO", + "SeeSo" + ], + "Shomi": "SHMI", + "Spike": "SPIK", + "Spike TV": [ + "SPKE", + "re:Spike-?TV" + ], + "Sportsnet": "SNET", + "Sprout": "SPRT", + "Stan": "STAN", + "Starz": "STZ", + "Sveriges Television": "SVT", + "SwearNet": "SWER", + "Syfy": "SYFY", + "TBS": "TBS", + "TFou": "TFOU", + "The CW": [ + "CW", + "re:The-?CW" + ], + "TLC": "TLC", + "TubiTV": "TUBI", + "TV3 Ireland": "TV3", + "TV4 Sweeden": "TV4", + "TVING": "TVING", + "TV Land": [ + "TVL", + "re:TV-?Land" + ], + "UFC": "UFC", + "UKTV": "UKTV", + "Univision": "UNIV", + "USA Network": "USAN", + "Velocity": "VLCT", + "VH1": "VH1", + "Viceland": "VICE", + "Viki": "VIKI", + "Vimeo": "VMEO", + "VRV": "VRV", + "W Network": "WNET", + "WatchMe": "WME", + "WWE Network": "WWEN", + "Xbox Video": "XBOX", + "Yahoo": "YHOO", + "YouTube Red": "RED", + "ZDF": "ZDF" } } -} \ No newline at end of file +} diff --git a/libs/common/guessit/options.py b/libs/common/guessit/options.py index 50ee4235..8fa6825c 100644 --- a/libs/common/guessit/options.py +++ b/libs/common/guessit/options.py @@ -128,7 +128,7 @@ class ConfigurationException(Exception): """ Exception related to configuration file. """ - pass + pass # pylint:disable=unnecessary-pass def load_config(options): @@ -153,7 +153,7 @@ def load_config(options): cwd = os.getcwd() yaml_supported = False try: - import yaml # pylint: disable=unused-variable + import yaml # pylint:disable=unused-variable,unused-import yaml_supported = True except ImportError: pass @@ -252,7 +252,7 @@ def load_config_file(filepath): try: import yaml with open(filepath) as config_file_data: - return yaml.load(config_file_data) + return yaml.load(config_file_data, yaml.SafeLoader) except ImportError: # pragma: no cover raise ConfigurationException('Configuration file extension is not supported. ' 'PyYAML should be installed to support "%s" file' % ( diff --git a/libs/common/guessit/rules/common/formatters.py b/libs/common/guessit/rules/common/formatters.py index 434c20be..2a64dee9 100644 --- a/libs/common/guessit/rules/common/formatters.py +++ b/libs/common/guessit/rules/common/formatters.py @@ -25,7 +25,7 @@ def _potential_before(i, input_string): :return: :rtype: bool """ - return i - 2 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps + return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps def _potential_after(i, input_string): diff --git a/libs/common/guessit/rules/common/validators.py b/libs/common/guessit/rules/common/validators.py index 0e79b989..0d0eb3eb 100644 --- a/libs/common/guessit/rules/common/validators.py +++ b/libs/common/guessit/rules/common/validators.py @@ -28,7 +28,7 @@ def int_coercable(string): return False -def compose(*validators): +def and_(*validators): """ Compose validators functions :param validators: @@ -49,3 +49,26 @@ def compose(*validators): return False return True return composed + + +def or_(*validators): + """ + Compose validators functions + :param validators: + :type validators: + :return: + :rtype: + """ + def composed(string): + """ + Composed validators function + :param string: + :type string: + :return: + :rtype: + """ + for validator in validators: + if validator(string): + return True + return False + return composed diff --git a/libs/common/guessit/rules/match_processors.py b/libs/common/guessit/rules/match_processors.py new file mode 100644 index 00000000..0b49372f --- /dev/null +++ b/libs/common/guessit/rules/match_processors.py @@ -0,0 +1,20 @@ +""" +Match processors +""" +from guessit.rules.common import seps + + +def strip(match, chars=seps): + """ + Strip given characters from match. + + :param chars: + :param match: + :return: + """ + while match.input_string[match.start] in chars: + match.start += 1 + while match.input_string[match.end - 1] in chars: + match.end -= 1 + if not match: + return False diff --git a/libs/common/guessit/rules/processors.py b/libs/common/guessit/rules/processors.py index cced26a5..5b018140 100644 --- a/libs/common/guessit/rules/processors.py +++ b/libs/common/guessit/rules/processors.py @@ -34,7 +34,9 @@ class EnlargeGroupMatches(CustomRule): for match in matches.ending(group.end - 1): ending.append(match) - return starting, ending + if starting or ending: + return starting, ending + return False def then(self, matches, when_response, context): starting, ending = when_response diff --git a/libs/common/guessit/rules/properties/audio_codec.py b/libs/common/guessit/rules/properties/audio_codec.py index a2566bce..815caff9 100644 --- a/libs/common/guessit/rules/properties/audio_codec.py +++ b/libs/common/guessit/rules/properties/audio_codec.py @@ -3,9 +3,8 @@ """ audio_codec, audio_profile and audio_channels property """ -from rebulk.remodule import re - from rebulk import Rebulk, Rule, RemoveMatch +from rebulk.remodule import re from ..common import dash from ..common.pattern import is_disabled @@ -23,7 +22,9 @@ def audio_codec(config): # pylint:disable=unused-argument :return: Created Rebulk object :rtype: Rebulk """ - rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) + rebulk = Rebulk()\ + .regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\ + .string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): """ @@ -61,7 +62,9 @@ def audio_codec(config): # pylint:disable=unused-argument rebulk.string('PCM', value='PCM') rebulk.string('LPCM', value='LPCM') - rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile')) + rebulk.defaults(clear=True, + name='audio_profile', + disabled=lambda context: is_disabled(context, 'audio_profile')) rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD']) rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD']) rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS']) @@ -70,17 +73,19 @@ def audio_codec(config): # pylint:disable=unused-argument rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital']) rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital']) - rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels')) - rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True) - rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True) - rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True) + rebulk.defaults(clear=True, + name="audio_channels", + disabled=lambda context: is_disabled(context, 'audio_channels')) rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') - rebulk.string('7ch', '8ch', value='7.1') - rebulk.string('5ch', '6ch', value='5.1') - rebulk.string('2ch', 'stereo', value='2.0') - rebulk.string('1ch', 'mono', value='1.0') + + for value, items in config.get('audio_channels').items(): + for item in items: + if item.startswith('re:'): + rebulk.regex(item[3:], value=value, children=True) + else: + rebulk.string(item, value=value) rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) diff --git a/libs/common/guessit/rules/properties/bit_rate.py b/libs/common/guessit/rules/properties/bit_rate.py index 391f1d2f..d279c9f1 100644 --- a/libs/common/guessit/rules/properties/bit_rate.py +++ b/libs/common/guessit/rules/properties/bit_rate.py @@ -69,4 +69,6 @@ class BitRateTypeRule(Rule): else: to_rename.append(match) - return to_rename, to_remove + if to_rename or to_remove: + return to_rename, to_remove + return False diff --git a/libs/common/guessit/rules/properties/bonus.py b/libs/common/guessit/rules/properties/bonus.py index c4554cd0..54087aa3 100644 --- a/libs/common/guessit/rules/properties/bonus.py +++ b/libs/common/guessit/rules/properties/bonus.py @@ -26,7 +26,8 @@ def bonus(config): # pylint:disable=unused-argument rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int, - validator={'__parent__': lambda match: seps_surround}, + validator={'__parent__': seps_surround}, + validate_all=True, conflict_solver=lambda match, conflicting: match if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags else '__default__') diff --git a/libs/common/guessit/rules/properties/container.py b/libs/common/guessit/rules/properties/container.py index 77599509..0f1860af 100644 --- a/libs/common/guessit/rules/properties/container.py +++ b/libs/common/guessit/rules/properties/container.py @@ -44,7 +44,8 @@ def container(config): rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent']) rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb']) - rebulk.defaults(name='container', + rebulk.defaults(clear=True, + name='container', validator=seps_surround, formatter=lambda s: s.lower(), conflict_solver=lambda match, other: match diff --git a/libs/common/guessit/rules/properties/episode_title.py b/libs/common/guessit/rules/properties/episode_title.py index d429c3e7..ece8921d 100644 --- a/libs/common/guessit/rules/properties/episode_title.py +++ b/libs/common/guessit/rules/properties/episode_title.py @@ -10,6 +10,7 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PRO from ..common import seps, title_seps from ..common.formatters import cleanup from ..common.pattern import is_disabled +from ..common.validators import or_ from ..properties.title import TitleFromPosition, TitleBaseRule from ..properties.type import TypeProcessor @@ -133,8 +134,7 @@ class EpisodeTitleFromPosition(TitleBaseRule): def hole_filter(self, hole, matches): episode = matches.previous(hole, - lambda previous: any(name in previous.names - for name in self.previous_names), + lambda previous: previous.named(*self.previous_names), 0) crc32 = matches.named('crc32') @@ -179,8 +179,7 @@ class AlternativeTitleReplace(Rule): predicate=lambda match: 'title' in match.tags, index=0) if main_title: episode = matches.previous(main_title, - lambda previous: any(name in previous.names - for name in self.previous_names), + lambda previous: previous.named(*self.previous_names), 0) crc32 = matches.named('crc32') @@ -249,7 +248,7 @@ class Filepart3EpisodeTitle(Rule): if season: hole = matches.holes(subdirectory.start, subdirectory.end, - ignore=lambda match: 'weak-episode' in match.tags, + ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored), formatter=cleanup, seps=title_seps, predicate=lambda match: match.value, index=0) if hole: @@ -292,7 +291,8 @@ class Filepart2EpisodeTitle(Rule): season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0)) if season: - hole = matches.holes(directory.start, directory.end, ignore=lambda match: 'weak-episode' in match.tags, + hole = matches.holes(directory.start, directory.end, + ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored), formatter=cleanup, seps=title_seps, predicate=lambda match: match.value, index=0) if hole: diff --git a/libs/common/guessit/rules/properties/episodes.py b/libs/common/guessit/rules/properties/episodes.py index 97f060a2..345c785d 100644 --- a/libs/common/guessit/rules/properties/episodes.py +++ b/libs/common/guessit/rules/properties/episodes.py @@ -11,12 +11,13 @@ from rebulk.match import Match from rebulk.remodule import re from rebulk.utils import is_iterable +from guessit.rules import match_processors +from guessit.rules.common.numeral import parse_numeral, numeral from .title import TitleFromPosition from ..common import dash, alt_dash, seps, seps_no_fs from ..common.formatters import strip -from ..common.numeral import numeral, parse_numeral from ..common.pattern import is_disabled -from ..common.validators import compose, seps_surround, seps_before, int_coercable +from ..common.validators import seps_surround, int_coercable, and_ from ...reutils import build_or_pattern @@ -29,17 +30,12 @@ def episodes(config): :return: Created Rebulk object :rtype: Rebulk """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals def is_season_episode_disabled(context): """Whether season and episode rules should be enabled.""" return is_disabled(context, 'episode') or is_disabled(context, 'season') - rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) - rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker']) - - episode_max_range = config['episode_max_range'] - season_max_range = config['season_max_range'] - def episodes_season_chain_breaker(matches): """ Break chains if there's more than 100 offset between two neighbor values. @@ -57,8 +53,6 @@ def episodes(config): return True return False - rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker) - def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns @@ -76,7 +70,6 @@ def episodes(config): if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags and not match.initiator.children.named(match.name + 'Marker')) or ( other.name == 'screen_size' and not int_coercable(other.raw)): - return match if other.name in ('season', 'episode') and match.initiator != other.initiator: if (match.initiator.name in ('weak_episode', 'weak_duplicate') @@ -87,21 +80,6 @@ def episodes(config): return current return '__default__' - season_words = config['season_words'] - episode_words = config['episode_words'] - of_words = config['of_words'] - all_words = config['all_words'] - season_markers = config['season_markers'] - season_ep_markers = config['season_ep_markers'] - disc_markers = config['disc_markers'] - episode_markers = config['episode_markers'] - range_separators = config['range_separators'] - weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators) - strong_discrete_separators = config['discrete_separators'] - discrete_separators = strong_discrete_separators + weak_discrete_separators - - max_range_gap = config['max_range_gap'] - def ordering_validator(match): """ Validator for season list. They should be in natural order to be validated. @@ -135,65 +113,18 @@ def episodes(config): lambda m: m.name == property_name + 'Separator') separator = match.children.previous(current_match, lambda m: m.name == property_name + 'Separator', 0) - if separator.raw not in range_separators and separator.raw in weak_discrete_separators: - if not 0 < current_match.value - previous_match.value <= max_range_gap + 1: - valid = False - if separator.raw in strong_discrete_separators: - valid = True - break + if separator: + if separator.raw not in range_separators and separator.raw in weak_discrete_separators: + if not 0 < current_match.value - previous_match.value <= max_range_gap + 1: + valid = False + if separator.raw in strong_discrete_separators: + valid = True + break previous_match = current_match return valid return is_consecutive('episode') and is_consecutive('season') - # S01E02, 01x02, S01S02S03 - rebulk.chain(formatter={'season': int, 'episode': int}, - tags=['SxxExx'], - abbreviations=[alt_dash], - children=True, - private_parent=True, - validate_all=True, - validator={'__parent__': ordering_validator}, - conflict_solver=season_episode_conflict_solver, - disabled=is_season_episode_disabled) \ - .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P\d+)@?' + - build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P\d+)', - validate_all=True, - validator={'__parent__': seps_before}).repeater('+') \ - .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators, - name='episodeSeparator', - escape=True) + - r'(?P\d+)').repeater('*') \ - .chain() \ - .regex(r'(?P\d+)@?' + - build_or_pattern(season_ep_markers, name='episodeMarker') + - r'@?(?P\d+)', - validate_all=True, - validator={'__parent__': seps_before}) \ - .chain() \ - .regex(r'(?P\d+)@?' + - build_or_pattern(season_ep_markers, name='episodeMarker') + - r'@?(?P\d+)', - validate_all=True, - validator={'__parent__': seps_before}) \ - .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators, - name='episodeSeparator', - escape=True) + - r'(?P\d+)').repeater('*') \ - .chain() \ - .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P\d+)', - validate_all=True, - validator={'__parent__': seps_before}) \ - .regex(build_or_pattern(season_markers + discrete_separators + range_separators, - name='seasonSeparator', - escape=True) + - r'(?P\d+)').repeater('*') - - # episode_details property - for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'): - rebulk.string(episode_detail, value=episode_detail, name='episode_details', - disabled=lambda context: is_disabled(context, 'episode_details')) - def validate_roman(match): """ Validate a roman match if surrounded by separators @@ -206,117 +137,203 @@ def episodes(config): return True return seps_surround(match) + season_words = config['season_words'] + episode_words = config['episode_words'] + of_words = config['of_words'] + all_words = config['all_words'] + season_markers = config['season_markers'] + season_ep_markers = config['season_ep_markers'] + disc_markers = config['disc_markers'] + episode_markers = config['episode_markers'] + range_separators = config['range_separators'] + weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators) + strong_discrete_separators = config['discrete_separators'] + discrete_separators = strong_discrete_separators + weak_discrete_separators + episode_max_range = config['episode_max_range'] + season_max_range = config['season_max_range'] + max_range_gap = config['max_range_gap'] + + rebulk = Rebulk() \ + .regex_defaults(flags=re.IGNORECASE) \ + .string_defaults(ignore_case=True) \ + .chain_defaults(chain_breaker=episodes_season_chain_breaker) \ + .defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'], + formatter={'season': int, 'episode': int, 'version': int, 'count': int}, + children=True, + private_parent=True, + conflict_solver=season_episode_conflict_solver, + abbreviations=[alt_dash]) + + # S01E02, 01x02, S01S02S03 + rebulk.chain( + tags=['SxxExx'], + validate_all=True, + validator={'__parent__': and_(seps_surround, ordering_validator)}, + disabled=is_season_episode_disabled) \ + .defaults(tags=['SxxExx']) \ + .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P\d+)@?' + + build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P\d+)')\ + .repeater('+') \ + .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators, + name='episodeSeparator', + escape=True) + + r'(?P\d+)').repeater('*') + + rebulk.chain(tags=['SxxExx'], + validate_all=True, + validator={'__parent__': and_(seps_surround, ordering_validator)}, + disabled=is_season_episode_disabled) \ + .defaults(tags=['SxxExx']) \ + .regex(r'(?P\d+)@?' + + build_or_pattern(season_ep_markers, name='episodeMarker') + + r'@?(?P\d+)').repeater('+') \ + + rebulk.chain(tags=['SxxExx'], + validate_all=True, + validator={'__parent__': and_(seps_surround, ordering_validator)}, + disabled=is_season_episode_disabled) \ + .defaults(tags=['SxxExx']) \ + .regex(r'(?P\d+)@?' + + build_or_pattern(season_ep_markers, name='episodeMarker') + + r'@?(?P\d+)') \ + .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators, + name='episodeSeparator', + escape=True) + + r'(?P\d+)').repeater('*') + + rebulk.chain(tags=['SxxExx'], + validate_all=True, + validator={'__parent__': and_(seps_surround, ordering_validator)}, + disabled=is_season_episode_disabled) \ + .defaults(tags=['SxxExx']) \ + .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P\d+)') \ + .regex('(?PExtras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \ + .regex(build_or_pattern(season_markers + discrete_separators + range_separators, + name='seasonSeparator', + escape=True) + + r'(?P\d+)').repeater('*') + + # episode_details property + for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'): + rebulk.string(episode_detail, + private_parent=False, + children=False, + value=episode_detail, + name='episode_details', + disabled=lambda context: is_disabled(context, 'episode_details')) + rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'], - validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True, + validate_all=True, + validator={'__parent__': and_(seps_surround, ordering_validator)}, + children=True, + private_parent=True, conflict_solver=season_episode_conflict_solver) - rebulk.chain(abbreviations=[alt_dash], + rebulk.chain(validate_all=True, + conflict_solver=season_episode_conflict_solver, formatter={'season': parse_numeral, 'count': parse_numeral}, - validator={'__parent__': compose(seps_surround, ordering_validator), + validator={'__parent__': and_(seps_surround, ordering_validator), 'season': validate_roman, 'count': validate_roman}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \ - .defaults(validator=None) \ + .defaults(formatter={'season': parse_numeral, 'count': parse_numeral}, + validator={'season': validate_roman, 'count': validate_roman}, + conflict_solver=season_episode_conflict_solver) \ .regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P' + numeral + ')').repeater('?') \ .regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'], name='seasonSeparator', escape=True) + r'@?(?P\d+)').repeater('*') + rebulk.defaults(abbreviations=[dash]) + rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P\d+)' + r'(?:v(?P\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P\d+))?', # Episode 4 - abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int}, disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode')) rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P' + numeral + ')' + r'(?:v(?P\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P\d+))?', # Episode 4 - abbreviations=[dash], validator={'episode': validate_roman}, - formatter={'episode': parse_numeral, 'version': int, 'count': int}, + formatter={'episode': parse_numeral}, disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) rebulk.regex(r'S?(?P\d+)-?(?:xE|Ex|E|x)-?(?P' + build_or_pattern(all_words) + ')', tags=['SxxExx'], - abbreviations=[dash], - validator=None, - formatter={'season': int, 'other': lambda match: 'Complete'}, + formatter={'other': lambda match: 'Complete'}, disabled=lambda context: is_disabled(context, 'season')) # 12, 13 - rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, + rebulk.chain(tags=['weak-episode'], disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ - .defaults(validator=None) \ + .defaults(validator=None, tags=['weak-episode']) \ .regex(r'(?P\d{2})') \ .regex(r'v(?P\d+)').repeater('?') \ - .regex(r'(?P[x-])(?P\d{2})').repeater('*') + .regex(r'(?P[x-])(?P\d{2})', abbreviations=None).repeater('*') # 012, 013 - rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, + rebulk.chain(tags=['weak-episode'], disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ - .defaults(validator=None) \ + .defaults(validator=None, tags=['weak-episode']) \ .regex(r'0(?P\d{1,2})') \ .regex(r'v(?P\d+)').repeater('?') \ - .regex(r'(?P[x-])0(?P\d{1,2})').repeater('*') + .regex(r'(?P[x-])0(?P\d{1,2})', abbreviations=None).repeater('*') # 112, 113 rebulk.chain(tags=['weak-episode'], - formatter={'episode': int, 'version': int}, name='weak_episode', disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ - .defaults(validator=None) \ + .defaults(validator=None, tags=['weak-episode'], name='weak_episode') \ .regex(r'(?P\d{3,4})') \ .regex(r'v(?P\d+)').repeater('?') \ - .regex(r'(?P[x-])(?P\d{3,4})').repeater('*') + .regex(r'(?P[x-])(?P\d{3,4})', abbreviations=None).repeater('*') # 1, 2, 3 - rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, + rebulk.chain(tags=['weak-episode'], disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \ - .defaults(validator=None) \ + .defaults(validator=None, tags=['weak-episode']) \ .regex(r'(?P\d)') \ .regex(r'v(?P\d+)').repeater('?') \ - .regex(r'(?P[x-])(?P\d{1,2})').repeater('*') + .regex(r'(?P[x-])(?P\d{1,2})', abbreviations=None).repeater('*') # e112, e113, 1e18, 3e19 - # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) - rebulk.chain(formatter={'season': int, 'episode': int, 'version': int}, - disabled=lambda context: is_disabled(context, 'episode')) \ + rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P\d{1,2})?(?Pe)(?P\d{1,4})') \ .regex(r'v(?P\d+)').repeater('?') \ - .regex(r'(?Pe|x|-)(?P\d{1,4})').repeater('*') + .regex(r'(?Pe|x|-)(?P\d{1,4})', abbreviations=None).repeater('*') # ep 112, ep113, ep112, ep113 - rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}, - disabled=lambda context: is_disabled(context, 'episode')) \ + rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'ep-?(?P\d{1,4})') \ .regex(r'v(?P\d+)').repeater('?') \ - .regex(r'(?Pep|e|x|-)(?P\d{1,4})').repeater('*') + .regex(r'(?Pep|e|x|-)(?P\d{1,4})', abbreviations=None).repeater('*') # cap 112, cap 112_114 - rebulk.chain(abbreviations=[dash], - tags=['see-pattern'], - formatter={'season': int, 'episode': int}, + rebulk.chain(tags=['see-pattern'], disabled=is_season_episode_disabled) \ - .defaults(validator=None) \ + .defaults(validator=None, tags=['see-pattern']) \ .regex(r'(?Pcap)-?(?P\d{1,2})(?P\d{2})') \ .regex(r'(?P-)(?P\d{1,2})(?P\d{2})').repeater('?') # 102, 0102 rebulk.chain(tags=['weak-episode', 'weak-duplicate'], - formatter={'season': int, 'episode': int, 'version': int}, name='weak_duplicate', conflict_solver=season_episode_conflict_solver, disabled=lambda context: (context.get('episode_prefer_number', False) or context.get('type') == 'movie') or is_season_episode_disabled(context)) \ - .defaults(validator=None) \ + .defaults(tags=['weak-episode', 'weak-duplicate'], + name='weak_duplicate', + validator=None, + conflict_solver=season_episode_conflict_solver) \ .regex(r'(?P\d{1,2})(?P\d{2})') \ .regex(r'v(?P\d+)').repeater('?') \ - .regex(r'(?Px|-)(?P\d{2})').repeater('*') + .regex(r'(?Px|-)(?P\d{2})', abbreviations=None).repeater('*') - rebulk.regex(r'v(?P\d+)', children=True, private_parent=True, formatter=int, + rebulk.regex(r'v(?P\d+)', + formatter=int, disabled=lambda context: is_disabled(context, 'version')) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) @@ -325,18 +342,23 @@ def episodes(config): # detached of X count (season/episode) rebulk.regex(r'(?P\d+)-?' + build_or_pattern(of_words) + r'-?(?P\d+)-?' + build_or_pattern(episode_words) + '?', - abbreviations=[dash], children=True, private_parent=True, formatter=int, + formatter=int, + pre_match_processor=match_processors.strip, disabled=lambda context: is_disabled(context, 'episode')) - rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode", + rebulk.regex(r'Minisodes?', + children=False, + private_parent=False, + name='episode_format', + value="Minisode", disabled=lambda context: is_disabled(context, 'episode_format')) rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode, SeePatternRange(range_separators + ['_']), EpisodeNumberSeparatorRange(range_separators), - SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, - RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, - RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch) + SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, + EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words), + RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch) return rebulk @@ -416,7 +438,9 @@ class WeakConflictSolver(Rule): if to_append: to_remove.extend(weak_dup_matches) - return to_remove, to_append + if to_remove or to_append: + return to_remove, to_append + return False class CountValidator(Rule): @@ -442,7 +466,9 @@ class CountValidator(Rule): season_count.append(count) else: to_remove.append(count) - return to_remove, episode_count, season_count + if to_remove or episode_count or season_count: + return to_remove, episode_count, season_count + return False class SeePatternRange(Rule): @@ -477,7 +503,9 @@ class SeePatternRange(Rule): to_remove.append(separator) - return to_remove, to_append + if to_remove or to_append: + return to_remove, to_append + return False class AbstractSeparatorRange(Rule): @@ -533,7 +561,9 @@ class AbstractSeparatorRange(Rule): previous_match = next_match - return to_remove, to_append + if to_remove or to_append: + return to_remove, to_append + return False class RenameToAbsoluteEpisode(Rule): @@ -629,20 +659,41 @@ class RemoveWeak(Rule): Remove weak-episode matches which appears after video, source, and audio matches. """ priority = 16 - consequence = RemoveMatch + consequence = RemoveMatch, AppendMatch + + def __init__(self, episode_words): + super(RemoveWeak, self).__init__() + self.episode_words = episode_words def when(self, matches, context): to_remove = [] + to_append = [] for filepart in matches.markers.named('path'): weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags) if weaks: - previous = matches.previous(weaks[0], predicate=lambda m: m.name in ( + weak = weaks[0] + previous = matches.previous(weak, predicate=lambda m: m.name in ( 'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile', 'audio_channels', 'audio_profile'), index=0) if previous and not matches.holes( - previous.end, weaks[0].start, predicate=lambda m: m.raw.strip(seps)): + previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)): + if previous.raw.lower() in self.episode_words: + try: + episode = copy.copy(weak) + episode.name = 'episode' + episode.value = int(weak.value) + episode.start = previous.start + episode.private = False + episode.tags = [] + + to_append.append(episode) + except ValueError: + pass + to_remove.extend(weaks) - return to_remove + if to_remove or to_append: + return to_remove, to_append + return False class RemoveWeakIfSxxExx(Rule): @@ -856,4 +907,6 @@ class RenameToDiscMatch(Rule): markers.append(marker) discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value)) - return discs, markers, to_remove + if discs or markers or to_remove: + return discs, markers, to_remove + return False diff --git a/libs/common/guessit/rules/properties/language.py b/libs/common/guessit/rules/properties/language.py index bcdbda8b..3f83bc34 100644 --- a/libs/common/guessit/rules/properties/language.py +++ b/libs/common/guessit/rules/properties/language.py @@ -72,6 +72,8 @@ def language(config, common_words): UNDETERMINED = babelfish.Language('und') +MULTIPLE = babelfish.Language('mul') +NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE]) class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring @@ -388,7 +390,9 @@ class SubtitlePrefixLanguageRule(Rule): to_remove.extend(matches.conflicting(lang)) if prefix in to_remove: to_remove.remove(prefix) - return to_rename, to_remove + if to_rename or to_remove: + return to_rename, to_remove + return False def then(self, matches, when_response, context): to_rename, to_remove = when_response @@ -425,7 +429,9 @@ class SubtitleSuffixLanguageRule(Rule): to_append.append(lang) if suffix in to_remove: to_remove.remove(suffix) - return to_append, to_remove + if to_append or to_remove: + return to_append, to_remove + return False def then(self, matches, when_response, context): to_rename, to_remove = when_response @@ -478,6 +484,7 @@ class RemoveInvalidLanguages(Rule): """Remove language matches that matches the blacklisted common words.""" consequence = RemoveMatch + priority = 32 def __init__(self, common_words): """Constructor.""" diff --git a/libs/common/guessit/rules/properties/other.py b/libs/common/guessit/rules/properties/other.py index 330caa92..c7dc9a88 100644 --- a/libs/common/guessit/rules/properties/other.py +++ b/libs/common/guessit/rules/properties/other.py @@ -11,7 +11,7 @@ from rebulk.remodule import re from ..common import dash from ..common import seps from ..common.pattern import is_disabled -from ..common.validators import seps_after, seps_before, seps_surround, compose +from ..common.validators import seps_after, seps_before, seps_surround, and_ from ...reutils import build_or_pattern from ...rules.common.formatters import raw_cleanup @@ -35,11 +35,16 @@ def other(config): # pylint:disable=unused-argument,too-many-statements rebulk.regex('ws', 'wide-?screen', value='Widescreen') rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded') - rebulk.string('Proper', 'Repack', 'Rerip', value='Proper', + rebulk.string('Repack', 'Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix']) + rebulk.string('Proper', value='Proper', + tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix', 'real']) + rebulk.regex('Real', value='Proper', + tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real']) + rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after', 'streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix', @@ -72,16 +77,18 @@ def other(config): # pylint:disable=unused-argument,too-many-statements private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'], value={'other': 'Complete'}, tags=['release-group-prefix'], - validator={'__parent__': compose(seps_surround, validate_complete)}) + validator={'__parent__': and_(seps_surround, validate_complete)}) rebulk.string('R5', value='Region 5') rebulk.string('RC', value='Region C') rebulk.regex('Pre-?Air', value='Preair') - rebulk.regex('(?:PS-?)?Vita', value='PS Vita') + rebulk.regex('(?:PS-?)Vita', value='PS Vita') + rebulk.regex('Vita', value='PS Vita', tags='has-neighbor') rebulk.regex('(HD)(?PRip)', value={'other': 'HD', 'another': 'Rip'}, private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True) - for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'): + for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'): rebulk.string(value, value=value) + rebulk.string('3D', value='3D', tags='has-neighbor') rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor') rebulk.string('HR', value='High Resolution') @@ -90,6 +97,7 @@ def other(config): # pylint:disable=unused-argument,too-many-statements rebulk.string('mHD', 'HDLight', value='Micro HD') rebulk.string('LDTV', value='Low Definition') rebulk.string('HFR', value='High Frame Rate') + rebulk.string('VFR', value='Variable Frame Rate') rebulk.string('HD', value='HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None, @@ -128,13 +136,15 @@ def other(config): # pylint:disable=unused-argument,too-many-statements rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor') rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group']) + rebulk.string('Extras', value='Extras', tags='has-neighbor') + rebulk.regex('Digital-?Extras?', value='Extras') rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group']) rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group']) rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group') rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, - ValidateAtEnd, ProperCountRule) + ValidateAtEnd, ValidateReal, ProperCountRule) return rebulk @@ -354,3 +364,20 @@ class ValidateAtEnd(Rule): to_remove.append(match) return to_remove + + +class ValidateReal(Rule): + """ + Validate Real + """ + consequence = RemoveMatch + priority = 64 + + def when(self, matches, context): + ret = [] + for filepart in matches.markers.named('path'): + for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags): + if not matches.range(filepart.start, match.start): + ret.append(match) + + return ret diff --git a/libs/common/guessit/rules/properties/part.py b/libs/common/guessit/rules/properties/part.py index ec038b18..c1123394 100644 --- a/libs/common/guessit/rules/properties/part.py +++ b/libs/common/guessit/rules/properties/part.py @@ -8,7 +8,7 @@ from rebulk.remodule import re from rebulk import Rebulk from ..common import dash from ..common.pattern import is_disabled -from ..common.validators import seps_surround, int_coercable, compose +from ..common.validators import seps_surround, int_coercable, and_ from ..common.numeral import numeral, parse_numeral from ...reutils import build_or_pattern @@ -41,6 +41,6 @@ def part(config): # pylint:disable=unused-argument rebulk.regex(build_or_pattern(prefixes) + r'-?(?P' + numeral + r')', prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral, - validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)}) + validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)}) return rebulk diff --git a/libs/common/guessit/rules/properties/release_group.py b/libs/common/guessit/rules/properties/release_group.py index ff1ac660..ecff808b 100644 --- a/libs/common/guessit/rules/properties/release_group.py +++ b/libs/common/guessit/rules/properties/release_group.py @@ -9,8 +9,8 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch from rebulk.match import Match from ..common import seps -from ..common.expected import build_expected_function from ..common.comparators import marker_sorted +from ..common.expected import build_expected_function from ..common.formatters import cleanup from ..common.pattern import is_disabled from ..common.validators import int_coercable, seps_surround @@ -50,7 +50,7 @@ def release_group(config): if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps: string = string[:len(forbidden)] string = string.strip(groupname_seps) - return string + return string.strip() rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group')) @@ -72,7 +72,9 @@ _scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'a 'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language', 'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix') -_scene_previous_tags = ('release-group-prefix', ) +_scene_previous_tags = ('release-group-prefix',) + +_scene_no_previous_tags = ('no-release-group-prefix',) class DashSeparatedReleaseGroup(Rule): @@ -193,7 +195,8 @@ class DashSeparatedReleaseGroup(Rule): if releasegroup.value: to_append.append(releasegroup) - return to_remove, to_append + if to_remove or to_append: + return to_remove, to_append class SceneReleaseGroup(Rule): @@ -212,6 +215,17 @@ class SceneReleaseGroup(Rule): super(SceneReleaseGroup, self).__init__() self.value_formatter = value_formatter + @staticmethod + def is_previous_match(match): + """ + Check if match can precede release_group + + :param match: + :return: + """ + return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \ + match.tagged(*_scene_previous_tags) + def when(self, matches, context): # pylint:disable=too-many-locals # If a release_group is found before, ignore this kind of release_group rule. @@ -253,13 +267,12 @@ class SceneReleaseGroup(Rule): if match.start < filepart.start: return False - return not match.private or match.name in _scene_previous_names + return not match.private or self.is_previous_match(match) previous_match = matches.previous(last_hole, previous_match_filter, index=0) - if previous_match and (previous_match.name in _scene_previous_names or - any(tag in previous_match.tags for tag in _scene_previous_tags)) and \ + if previous_match and (self.is_previous_match(previous_match)) and \ not matches.input_string[previous_match.end:last_hole.start].strip(seps) \ and not int_coercable(last_hole.value.strip(seps)): @@ -300,11 +313,11 @@ class AnimeReleaseGroup(Rule): # If a release_group is found before, ignore this kind of release_group rule. if matches.named('release_group'): - return to_remove, to_append + return False if not matches.named('episode') and not matches.named('season') and matches.named('release_group'): # This doesn't seems to be an anime, and we already found another release_group. - return to_remove, to_append + return False for filepart in marker_sorted(matches.markers.named('path'), matches): @@ -328,4 +341,7 @@ class AnimeReleaseGroup(Rule): to_append.append(group) to_remove.extend(matches.range(empty_group.start, empty_group.end, lambda m: 'weak-language' in m.tags)) - return to_remove, to_append + + if to_remove or to_append: + return to_remove, to_append + return False diff --git a/libs/common/guessit/rules/properties/screen_size.py b/libs/common/guessit/rules/properties/screen_size.py index 83a797c1..77d5d052 100644 --- a/libs/common/guessit/rules/properties/screen_size.py +++ b/libs/common/guessit/rules/properties/screen_size.py @@ -24,8 +24,8 @@ def screen_size(config): :return: Created Rebulk object :rtype: Rebulk """ - interlaced = frozenset({res for res in config['interlaced']}) - progressive = frozenset({res for res in config['progressive']}) + interlaced = frozenset(config['interlaced']) + progressive = frozenset(config['progressive']) frame_rates = [re.escape(rate) for rate in config['frame_rates']] min_ar = config['min_ar'] max_ar = config['max_ar'] diff --git a/libs/common/guessit/rules/properties/source.py b/libs/common/guessit/rules/properties/source.py index ae9a7b03..2fe55618 100644 --- a/libs/common/guessit/rules/properties/source.py +++ b/libs/common/guessit/rules/properties/source.py @@ -12,7 +12,7 @@ from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule from .audio_codec import HqConflictRule from ..common import dash, seps from ..common.pattern import is_disabled -from ..common.validators import seps_before, seps_after +from ..common.validators import seps_before, seps_after, or_ def source(config): # pylint:disable=unused-argument @@ -26,7 +26,10 @@ def source(config): # pylint:disable=unused-argument """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True) - rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix']) + rebulk = rebulk.defaults(name='source', + tags=['video-codec-prefix', 'streaming_service.suffix'], + validate_all=True, + validator={'__parent__': or_(seps_before, seps_after)}) rip_prefix = '(?PRip)-?' rip_suffix = '-?(?PRip)' @@ -42,7 +45,7 @@ def source(config): # pylint:disable=unused-argument def demote_other(match, other): # pylint: disable=unused-argument """Default conflict solver with 'other' property.""" - return other if other.name == 'other' else '__default__' + return other if other.name == 'other' or other.name == 'release_group' else '__default__' rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix), value={'source': 'VHS', 'other': 'Rip'}) @@ -92,8 +95,9 @@ def source(config): # pylint:disable=unused-argument # WEBCap is a synonym to WEBRip, mostly used by non english rebulk.regex(*build_source_pattern('WEB-?(?PCap)', suffix=rip_optional_suffix), value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'}) - rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'WEB', 'DL-?WEB', 'DL(?=-?Mux)'), + rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'), value={'source': 'Web'}) + rebulk.regex('(WEB)', value='Web', tags='weak.source') rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix), value={'source': 'HD-DVD', 'other': 'Rip'}) @@ -118,7 +122,7 @@ def source(config): # pylint:disable=unused-argument rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix), value={'source': 'Satellite', 'other': 'Rip'}) - rebulk.rules(ValidateSource, UltraHdBlurayRule) + rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule) return rebulk @@ -170,32 +174,62 @@ class UltraHdBlurayRule(Rule): to_remove.append(match) to_append.append(new_source) - return to_remove, to_append + if to_remove or to_append: + return to_remove, to_append + return False -class ValidateSource(Rule): +class ValidateSourcePrefixSuffix(Rule): """ - Validate source with screener property, with video_codec property or separated + Validate source with source prefix, source suffix. """ priority = 64 consequence = RemoveMatch def when(self, matches, context): ret = [] - for match in matches.named('source'): - match = match.initiator - if not seps_before(match) and \ - not matches.range(match.start - 1, match.start - 2, - lambda m: 'source-prefix' in m.tags): - if match.children: - ret.extend(match.children) - ret.append(match) - continue - if not seps_after(match) and \ - not matches.range(match.end, match.end + 1, - lambda m: 'source-suffix' in m.tags): - if match.children: - ret.extend(match.children) - ret.append(match) - continue + for filepart in matches.markers.named('path'): + for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'): + match = match.initiator + if not seps_before(match) and \ + not matches.range(match.start - 1, match.start - 2, + lambda m: 'source-prefix' in m.tags): + if match.children: + ret.extend(match.children) + ret.append(match) + continue + if not seps_after(match) and \ + not matches.range(match.end, match.end + 1, + lambda m: 'source-suffix' in m.tags): + if match.children: + ret.extend(match.children) + ret.append(match) + continue + + return ret + + +class ValidateWeakSource(Rule): + """ + Validate weak source + """ + dependency = [ValidateSourcePrefixSuffix] + priority = 64 + consequence = RemoveMatch + + def when(self, matches, context): + ret = [] + for filepart in matches.markers.named('path'): + for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'): + # if there are more than 1 source in this filepart, just before the year and with holes for the title + # most likely the source is part of the title + if 'weak.source' in match.tags \ + and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \ + and matches.holes(filepart.start, match.start, + predicate=lambda m: m.value.strip(seps), index=-1): + if match.children: + ret.extend(match.children) + ret.append(match) + continue + return ret diff --git a/libs/common/guessit/rules/properties/streaming_service.py b/libs/common/guessit/rules/properties/streaming_service.py index 1302befb..f467f20a 100644 --- a/libs/common/guessit/rules/properties/streaming_service.py +++ b/libs/common/guessit/rules/properties/streaming_service.py @@ -25,133 +25,13 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='streaming_service', tags=['source-prefix']) - rebulk.string('AE', 'A&E', value='A&E') - rebulk.string('AMBC', value='ABC') - rebulk.string('AUBC', value='ABC Australia') - rebulk.string('AJAZ', value='Al Jazeera English') - rebulk.string('AMC', value='AMC') - rebulk.string('AMZN', 'Amazon', value='Amazon Prime') - rebulk.regex('Amazon-?Prime', value='Amazon Prime') - rebulk.string('AS', value='Adult Swim') - rebulk.regex('Adult-?Swim', value='Adult Swim') - rebulk.string('ATK', value="America's Test Kitchen") - rebulk.string('ANPL', value='Animal Planet') - rebulk.string('ANLB', value='AnimeLab') - rebulk.string('AOL', value='AOL') - rebulk.string('ARD', value='ARD') - rebulk.string('iP', value='BBC iPlayer') - rebulk.regex('BBC-?iPlayer', value='BBC iPlayer') - rebulk.string('BRAV', value='BravoTV') - rebulk.string('CNLP', value='Canal+') - rebulk.string('CN', value='Cartoon Network') - rebulk.string('CBC', value='CBC') - rebulk.string('CBS', value='CBS') - rebulk.string('CNBC', value='CNBC') - rebulk.string('CC', value='Comedy Central') - rebulk.string('4OD', value='Channel 4') - rebulk.string('CHGD', value='CHRGD') - rebulk.string('CMAX', value='Cinemax') - rebulk.string('CMT', value='Country Music Television') - rebulk.regex('Comedy-?Central', value='Comedy Central') - rebulk.string('CCGC', value='Comedians in Cars Getting Coffee') - rebulk.string('CR', value='Crunchy Roll') - rebulk.string('CRKL', value='Crackle') - rebulk.regex('Crunchy-?Roll', value='Crunchy Roll') - rebulk.string('CSPN', value='CSpan') - rebulk.string('CTV', value='CTV') - rebulk.string('CUR', value='CuriosityStream') - rebulk.string('CWS', value='CWSeed') - rebulk.string('DSKI', value='Daisuki') - rebulk.string('DHF', value='Deadhouse Films') - rebulk.string('DDY', value='Digiturk Diledigin Yerde') - rebulk.string('DISC', 'Discovery', value='Discovery') - rebulk.string('DSNY', 'Disney', value='Disney') - rebulk.string('DIY', value='DIY Network') - rebulk.string('DOCC', value='Doc Club') - rebulk.string('DPLY', value='DPlay') - rebulk.string('ETV', value='E!') - rebulk.string('EPIX', value='ePix') - rebulk.string('ETTV', value='El Trece') - rebulk.string('ESPN', value='ESPN') - rebulk.string('ESQ', value='Esquire') - rebulk.string('FAM', value='Family') - rebulk.string('FJR', value='Family Jr') - rebulk.string('FOOD', value='Food Network') - rebulk.string('FOX', value='Fox') - rebulk.string('FREE', value='Freeform') - rebulk.string('FYI', value='FYI Network') - rebulk.string('GLBL', value='Global') - rebulk.string('GLOB', value='GloboSat Play') - rebulk.string('HLMK', value='Hallmark') - rebulk.string('HBO', value='HBO Go') - rebulk.regex('HBO-?Go', value='HBO Go') - rebulk.string('HGTV', value='HGTV') - rebulk.string('HIST', 'History', value='History') - rebulk.string('HULU', value='Hulu') - rebulk.string('ID', value='Investigation Discovery') - rebulk.string('IFC', value='IFC') - rebulk.string('iTunes', 'iT', value='iTunes') - rebulk.string('ITV', value='ITV') - rebulk.string('KNOW', value='Knowledge Network') - rebulk.string('LIFE', value='Lifetime') - rebulk.string('MTOD', value='Motor Trend OnDemand') - rebulk.string('MNBC', value='MSNBC') - rebulk.string('MTV', value='MTV') - rebulk.string('NATG', value='National Geographic') - rebulk.regex('National-?Geographic', value='National Geographic') - rebulk.string('NBA', value='NBA TV') - rebulk.regex('NBA-?TV', value='NBA TV') - rebulk.string('NBC', value='NBC') - rebulk.string('NF', 'Netflix', value='Netflix') - rebulk.string('NFL', value='NFL') - rebulk.string('NFLN', value='NFL Now') - rebulk.string('GC', value='NHL GameCenter') - rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon') - rebulk.string('NRK', value='Norsk Rikskringkasting') - rebulk.string('PBS', value='PBS') - rebulk.string('PBSK', value='PBS Kids') - rebulk.string('PSN', value='Playstation Network') - rebulk.string('PLUZ', value='Pluzz') - rebulk.string('RTE', value='RTE One') - rebulk.string('SBS', value='SBS (AU)') - rebulk.string('SESO', 'SeeSo', value='SeeSo') - rebulk.string('SHMI', value='Shomi') - rebulk.string('SPIK', value='Spike') - rebulk.string('SPKE', value='Spike TV') - rebulk.regex('Spike-?TV', value='Spike TV') - rebulk.string('SNET', value='Sportsnet') - rebulk.string('SPRT', value='Sprout') - rebulk.string('STAN', value='Stan') - rebulk.string('STZ', value='Starz') - rebulk.string('SVT', value='Sveriges Television') - rebulk.string('SWER', value='SwearNet') - rebulk.string('SYFY', value='Syfy') - rebulk.string('TBS', value='TBS') - rebulk.string('TFOU', value='TFou') - rebulk.string('CW', value='The CW') - rebulk.regex('The-?CW', value='The CW') - rebulk.string('TLC', value='TLC') - rebulk.string('TUBI', value='TubiTV') - rebulk.string('TV3', value='TV3 Ireland') - rebulk.string('TV4', value='TV4 Sweeden') - rebulk.string('TVL', value='TV Land') - rebulk.regex('TV-?Land', value='TV Land') - rebulk.string('UFC', value='UFC') - rebulk.string('UKTV', value='UKTV') - rebulk.string('UNIV', value='Univision') - rebulk.string('USAN', value='USA Network') - rebulk.string('VLCT', value='Velocity') - rebulk.string('VH1', value='VH1') - rebulk.string('VICE', value='Viceland') - rebulk.string('VMEO', value='Vimeo') - rebulk.string('VRV', value='VRV') - rebulk.string('WNET', value='W Network') - rebulk.string('WME', value='WatchMe') - rebulk.string('WWEN', value='WWE Network') - rebulk.string('XBOX', value='Xbox Video') - rebulk.string('YHOO', value='Yahoo') - rebulk.string('RED', value='YouTube Red') - rebulk.string('ZDF', value='ZDF') + for value, items in config.items(): + patterns = items if isinstance(items, list) else [items] + for pattern in patterns: + if pattern.startswith('re:'): + rebulk.regex(pattern, value=value) + else: + rebulk.string(pattern, value=value) rebulk.rules(ValidateStreamingService) @@ -161,7 +41,7 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg class ValidateStreamingService(Rule): """Validate streaming service matches.""" - priority = 32 + priority = 128 consequence = RemoveMatch def when(self, matches, context): diff --git a/libs/common/guessit/rules/properties/title.py b/libs/common/guessit/rules/properties/title.py index d1cafe2a..0d263016 100644 --- a/libs/common/guessit/rules/properties/title.py +++ b/libs/common/guessit/rules/properties/title.py @@ -8,7 +8,12 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags from rebulk.formatters import formatters from .film import FilmTitleRule -from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule +from .language import ( + SubtitlePrefixLanguageRule, + SubtitleSuffixLanguageRule, + SubtitleExtensionRule, + NON_SPECIFIC_LANGUAGES +) from ..common import seps, title_seps from ..common.comparators import marker_sorted from ..common.expected import build_expected_function @@ -88,12 +93,19 @@ class TitleBaseRule(Rule): :rtype: """ cropped_holes = [] + group_markers = matches.markers.named('group') + for group_marker in group_markers: + path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0) + if path_marker and path_marker.span == group_marker.span: + group_markers.remove(group_marker) + for hole in holes: - group_markers = matches.markers.named('group') cropped_holes.extend(hole.crop(group_markers)) + return cropped_holes - def is_ignored(self, match): + @staticmethod + def is_ignored(match): """ Ignore matches when scanning for title (hole). @@ -130,7 +142,8 @@ class TitleBaseRule(Rule): for outside in outside_matches: other_languages.extend(matches.range(outside.start, outside.end, lambda c_match: c_match.name == match.name and - c_match not in to_keep)) + c_match not in to_keep and + c_match.value not in NON_SPECIFIC_LANGUAGES)) if not other_languages and (not starting or len(match.raw) <= 3): return True @@ -239,7 +252,7 @@ class TitleBaseRule(Rule): to_remove = [] if matches.named(self.match_name, lambda match: 'expected' in match.tags): - return ret, to_remove + return False fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches)) if not self.filepart_filter or self.filepart_filter(filepart, matches)] @@ -272,7 +285,9 @@ class TitleBaseRule(Rule): ret.extend(titles) to_remove.extend(to_remove_c) - return ret, to_remove + if ret or to_remove: + return ret, to_remove + return False class TitleFromPosition(TitleBaseRule): @@ -329,4 +344,6 @@ class PreferTitleWithYear(Rule): for title_match in titles: if title_match.value not in title_values: to_remove.append(title_match) - return to_remove, to_tag + if to_remove or to_tag: + return to_remove, to_tag + return False diff --git a/libs/common/guessit/rules/properties/video_codec.py b/libs/common/guessit/rules/properties/video_codec.py index b08ddcae..842a03c7 100644 --- a/libs/common/guessit/rules/properties/video_codec.py +++ b/libs/common/guessit/rules/properties/video_codec.py @@ -3,9 +3,8 @@ """ video_codec and video_profile property """ -from rebulk.remodule import re - from rebulk import Rebulk, Rule, RemoveMatch +from rebulk.remodule import re from ..common import dash from ..common.pattern import is_disabled @@ -43,7 +42,8 @@ def video_codec(config): # pylint:disable=unused-argument # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC - rebulk.defaults(name="video_profile", + rebulk.defaults(clear=True, + name="video_profile", validator=seps_surround, disabled=lambda context: is_disabled(context, 'video_profile')) @@ -66,7 +66,8 @@ def video_codec(config): # pylint:disable=unused-argument rebulk.string('DXVA', value='DXVA', name='video_api', disabled=lambda context: is_disabled(context, 'video_api')) - rebulk.defaults(name='color_depth', + rebulk.defaults(clear=True, + name='color_depth', validator=seps_surround, disabled=lambda context: is_disabled(context, 'color_depth')) rebulk.regex('12.?bits?', value='12-bit') diff --git a/libs/common/guessit/rules/properties/website.py b/libs/common/guessit/rules/properties/website.py index 00dfadd1..c1965311 100644 --- a/libs/common/guessit/rules/properties/website.py +++ b/libs/common/guessit/rules/properties/website.py @@ -67,7 +67,7 @@ def website(config): """ Validator for next website matches """ - return any(name in ['season', 'episode', 'year'] for name in match.names) + return match.named('season', 'episode', 'year') def when(self, matches, context): to_remove = [] @@ -80,7 +80,9 @@ def website(config): if not safe: suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0) if suffix: - to_remove.append(website_match) + group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0) + if not group: + to_remove.append(website_match) return to_remove rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix) diff --git a/libs/common/guessit/test/enable_disable_properties.yml b/libs/common/guessit/test/enable_disable_properties.yml index 86c659d6..ada9c347 100644 --- a/libs/common/guessit/test/enable_disable_properties.yml +++ b/libs/common/guessit/test/enable_disable_properties.yml @@ -35,9 +35,9 @@ -cd: 1 -cd_count: 3 -? This.Is.Us +? This.is.Us : options: --exclude country - title: This Is Us + title: This is Us -country: US ? 2015.01.31 @@ -286,9 +286,9 @@ : options: --exclude website -website: wawa.co.uk -? movie.mkv +? movie.mp4 : options: --exclude mimetype - -mimetype: video/x-matroska + -mimetype: video/mp4 ? another movie.mkv : options: --exclude container diff --git a/libs/common/guessit/test/episodes.yml b/libs/common/guessit/test/episodes.yml index f7b5c3df..4bbbde4a 100644 --- a/libs/common/guessit/test/episodes.yml +++ b/libs/common/guessit/test/episodes.yml @@ -201,9 +201,9 @@ ? Series/My Name Is Earl/My.Name.Is.Earl.S01Extras.-.Bad.Karma.DVDRip.XviD.avi : title: My Name Is Earl season: 1 - episode_title: Extras - Bad Karma + episode_title: Bad Karma source: DVD - other: Rip + other: [Extras, Rip] video_codec: Xvid ? series/Freaks And Geeks/Season 1/Episode 4 - Kim Kelly Is My Friend-eng(1).srt @@ -1917,9 +1917,11 @@ ? Duck.Dynasty.S02E07.Streik.German.DOKU.DL.WS.DVDRiP.x264-CDP : episode: 7 - episode_title: Streik German + episode_title: Streik source: DVD - language: mul + language: + - German + - Multi other: [Documentary, Widescreen, Rip] release_group: CDP season: 2 @@ -1930,9 +1932,11 @@ ? Family.Guy.S13E14.JOLO.German.AC3D.DL.720p.WebHD.x264-CDD : audio_codec: Dolby Digital episode: 14 - episode_title: JOLO German + episode_title: JOLO source: Web - language: mul + language: + - German + - Multi release_group: CDD screen_size: 720p season: 13 @@ -3025,7 +3029,7 @@ title: Show Name episode: [493, 494, 495, 496, 497, 498, 500, 501, 502, 503, 504, 505, 506, 507] screen_size: 720p - subtitle_language: fr + other: Variable Frame Rate video_codec: H.264 audio_codec: AAC type: episode @@ -4524,4 +4528,166 @@ video_codec: H.264 audio_codec: MP2 release_group: KIDKAT + type: episode + +? Por Trece Razones - Temporada 2 [HDTV 720p][Cap.201][AC3 5.1 Castellano]/Por Trece Razones 2x01 [des202].mkv +: title: Por Trece Razones + season: 2 + source: HDTV + screen_size: 720p + episode: 1 + audio_codec: Dolby Digital + audio_channels: '5.1' + language: Catalan + release_group: des202 + container: mkv + type: episode + +? Cuerpo de Elite - Temporada 1 [HDTV 720p][Cap.113][AC3 5.1 Esp Castellano]\CuerpoDeElite720p_113_desca202.mkv +: title: Cuerpo de Elite + season: 1 + source: HDTV + screen_size: 720p + episode: 13 + audio_codec: Dolby Digital + audio_channels: '5.1' + language: + - Spanish + - Catalan + container: mkv + type: episode + +? Show.Name.S01E01.St.Patricks.Day.1080p.mkv +: title: Show Name + season: 1 + episode: 1 + episode_title: St Patricks Day + screen_size: 1080p + container: mkv + type: episode + +? Show.Name.S01E01.St.Patricks.Day.1080p-grp.mkv +: title: Show Name + season: 1 + episode: 1 + episode_title: St Patricks Day + screen_size: 1080p + release_group: grp + container: mkv + type: episode + +? Titans.2018.S01E09.Hank.And.Dawn.720p.DCU.WEB-DL.AAC2.0.H264-NTb +: title: Titans + year: 2018 + season: 1 + episode: 9 + episode_title: Hank And Dawn + screen_size: 720p + streaming_service: DC Universe + source: Web + audio_codec: AAC + audio_channels: '2.0' + video_codec: H.264 + release_group: NTb + type: episode + +? S.W.A.T.2017.S01E21.Treibjagd.German.Dubbed.DL.AmazonHD.x264-TVS +: title: S.W.A.T. + year: 2017 + season: 1 + episode: 21 + episode_title: Treibjagd + language: + - German + - Multi + streaming_service: Amazon Prime + other: HD + video_codec: H.264 + release_group: TVS + type: episode + +? S.W.A.T.2017.S01E16.READNFO.720p.HDTV.x264-KILLERS +: title: S.W.A.T. + year: 2017 + season: 1 + episode: 16 + other: Read NFO + screen_size: 720p + source: HDTV + video_codec: H.264 + release_group: KILLERS + type: episode + +? /mnt/NAS/NoSubsTVShows/Babylon 5/Season 01/Ep. 02 - Soul Hunter +: title: Babylon 5 + season: 1 + episode: 2 + episode_title: Soul Hunter + type: episode + +? This.is.Us.S01E01.HDTV.x264-KILLERS.mkv +: title: This is Us + season: 1 + episode: 1 + source: HDTV + video_codec: H.264 + release_group: KILLERS + container: mkv + type: episode + +? Videos/Office1080/The Office (US) (2005) Season 2 S02 + Extras (1080p AMZN WEB-DL x265 HEVC 10bit AAC 2.0 LION)/The Office (US) (2005) - S02E12 - The Injury (1080p AMZN WEB-DL x265 LION).mkv +: title: The Office + country: US + year: 2005 + season: 2 + other: Extras + screen_size: 1080p + streaming_service: Amazon Prime + source: Web + video_codec: H.265 + video_profile: High Efficiency Video Coding + color_depth: 10-bit + audio_codec: AAC + audio_channels: '2.0' + release_group: LION + episode: 12 + episode_title: The Injury + container: mkv + type: episode + +? Thumping.Spike.2.E01.DF.WEBRip.720p-DRAMATV.mp4 +: title: Thumping Spike 2 + episode: 1 + source: Web + other: Rip + screen_size: 720p + streaming_service: DramaFever + release_group: DRAMATV + container: mp4 + mimetype: video/mp4 + type: episode + +? About.Time.E01.1080p.VIKI.WEB-DL-BLUEBERRY.mp4 +: title: About Time + episode: 1 + screen_size: 1080p + streaming_service: Viki + source: Web + release_group: BLUEBERRY + container: mp4 + mimetype: video/mp4 + type: episode + +? Eyes.Of.Dawn.1991.E01.480p.MBCVOD.AAC.x264-NOGPR.mp4 +: title: Eyes Of Dawn + year: 1991 + season: 1991 + episode: 1 + screen_size: 480p + streaming_service: MBC + audio_codec: AAC + video_codec: H.264 + release_group: NOGPR + container: mp4 + mimetype: video/mp4 type: episode \ No newline at end of file diff --git a/libs/common/guessit/test/movies.yml b/libs/common/guessit/test/movies.yml index 642012a9..a534ca0f 100644 --- a/libs/common/guessit/test/movies.yml +++ b/libs/common/guessit/test/movies.yml @@ -815,10 +815,12 @@ ? Das.Appartement.German.AC3D.DL.720p.BluRay.x264-TVP : audio_codec: Dolby Digital source: Blu-ray - language: mul + language: + - German + - Multi release_group: TVP screen_size: 720p - title: Das Appartement German + title: Das Appartement type: movie video_codec: H.264 @@ -1723,7 +1725,7 @@ ? Ant-Man.and.the.Wasp.2018.Digital.Extras.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv : title: Ant-Man and the Wasp year: 2018 - alternative_title: Digital Extras + other: Extras screen_size: 1080p streaming_service: Amazon Prime source: Web @@ -1770,4 +1772,15 @@ audio_channels: '5.1' video_codec: H.264 release_group: CMRG - type: movie \ No newline at end of file + type: movie + +? The.Girl.in.the.Spiders.Web.2019.1080p.WEB-DL.x264.AC3-EVO.mkv +: title: The Girl in the Spiders Web + year: 2019 + screen_size: 1080p + source: Web + video_codec: H.264 + audio_codec: Dolby Digital + release_group: EVO + container: mkv + type: movie diff --git a/libs/common/guessit/test/rules/common_words.yml b/libs/common/guessit/test/rules/common_words.yml new file mode 100644 index 00000000..d403a457 --- /dev/null +++ b/libs/common/guessit/test/rules/common_words.yml @@ -0,0 +1,467 @@ +? is +: title: is + +? it +: title: it + +? am +: title: am + +? mad +: title: mad + +? men +: title: men + +? man +: title: man + +? run +: title: run + +? sin +: title: sin + +? st +: title: st + +? to +: title: to + +? 'no' +: title: 'no' + +? non +: title: non + +? war +: title: war + +? min +: title: min + +? new +: title: new + +? car +: title: car + +? day +: title: day + +? bad +: title: bad + +? bat +: title: bat + +? fan +: title: fan + +? fry +: title: fry + +? cop +: title: cop + +? zen +: title: zen + +? gay +: title: gay + +? fat +: title: fat + +? one +: title: one + +? cherokee +: title: cherokee + +? got +: title: got + +? an +: title: an + +? as +: title: as + +? cat +: title: cat + +? her +: title: her + +? be +: title: be + +? hat +: title: hat + +? sun +: title: sun + +? may +: title: may + +? my +: title: my + +? mr +: title: mr + +? rum +: title: rum + +? pi +: title: pi + +? bb +: title: bb + +? bt +: title: bt + +? tv +: title: tv + +? aw +: title: aw + +? by +: title: by + +? md +: other: Mic Dubbed + +? mp +: title: mp + +? cd +: title: cd + +? in +: title: in + +? ad +: title: ad + +? ice +: title: ice + +? ay +: title: ay + +? at +: title: at + +? star +: title: star + +? so +: title: so + +? he +: title: he + +? do +: title: do + +? ax +: title: ax + +? mx +: title: mx + +? bas +: title: bas + +? de +: title: de + +? le +: title: le + +? son +: title: son + +? ne +: title: ne + +? ca +: title: ca + +? ce +: title: ce + +? et +: title: et + +? que +: title: que + +? mal +: title: mal + +? est +: title: est + +? vol +: title: vol + +? or +: title: or + +? mon +: title: mon + +? se +: title: se + +? je +: title: je + +? tu +: title: tu + +? me +: title: me + +? ma +: title: ma + +? va +: title: va + +? au +: country: AU + +? lu +: title: lu + +? wa +: title: wa + +? ga +: title: ga + +? ao +: title: ao + +? la +: title: la + +? el +: title: el + +? del +: title: del + +? por +: title: por + +? mar +: title: mar + +? al +: title: al + +? un +: title: un + +? ind +: title: ind + +? arw +: title: arw + +? ts +: source: Telesync + +? ii +: title: ii + +? bin +: title: bin + +? chan +: title: chan + +? ss +: title: ss + +? san +: title: san + +? oss +: title: oss + +? iii +: title: iii + +? vi +: title: vi + +? ben +: title: ben + +? da +: title: da + +? lt +: title: lt + +? ch +: title: ch + +? sr +: title: sr + +? ps +: title: ps + +? cx +: title: cx + +? vo +: title: vo + +? mkv +: container: mkv + +? avi +: container: avi + +? dmd +: title: dmd + +? the +: title: the + +? dis +: title: dis + +? cut +: title: cut + +? stv +: title: stv + +? des +: title: des + +? dia +: title: dia + +? and +: title: and + +? cab +: title: cab + +? sub +: title: sub + +? mia +: title: mia + +? rim +: title: rim + +? las +: title: las + +? une +: title: une + +? par +: title: par + +? srt +: container: srt + +? ano +: title: ano + +? toy +: title: toy + +? job +: title: job + +? gag +: title: gag + +? reel +: title: reel + +? www +: title: www + +? for +: title: for + +? ayu +: title: ayu + +? csi +: title: csi + +? ren +: title: ren + +? moi +: title: moi + +? sur +: title: sur + +? fer +: title: fer + +? fun +: title: fun + +? two +: title: two + +? big +: title: big + +? psy +: title: psy + +? air +: title: air + +? brazil +: title: brazil + +? jordan +: title: jordan + +? bs +: title: bs + +? kz +: title: kz + +? gt +: title: gt + +? im +: title: im + +? pt +: language: pt + +? scr +: title: scr + +? sd +: title: sd + +? hr +: other: High Resolution diff --git a/libs/common/guessit/test/rules/country.yml b/libs/common/guessit/test/rules/country.yml index 76383180..b3d4d8f1 100644 --- a/libs/common/guessit/test/rules/country.yml +++ b/libs/common/guessit/test/rules/country.yml @@ -5,8 +5,8 @@ : country: US title: this is title -? This.is.us.title -: title: This is us title +? This.is.Us +: title: This is Us ? This.Is.Us : options: --no-default-config diff --git a/libs/common/guessit/test/rules/other.yml b/libs/common/guessit/test/rules/other.yml index e2bea6e7..447f1787 100644 --- a/libs/common/guessit/test/rules/other.yml +++ b/libs/common/guessit/test/rules/other.yml @@ -48,7 +48,7 @@ proper_count: 3 -? Proper +? Proper.720p ? +Repack ? +Rerip : other: Proper @@ -80,7 +80,7 @@ ? Remux : other: Remux -? 3D +? 3D.2019 : other: 3D ? HD diff --git a/libs/common/guessit/test/suggested.json b/libs/common/guessit/test/suggested.json new file mode 100644 index 00000000..dc838ad0 --- /dev/null +++ b/libs/common/guessit/test/suggested.json @@ -0,0 +1,21 @@ +{ + "titles": [ + "13 Reasons Why", + "Star Wars: Episode VII - The Force Awakens", + "3%", + "The 100", + "3 Percent", + "This is Us", + "Open Season 2", + "Game of Thrones", + "The X-Files", + "11.22.63" + ], + "suggested": [ + "13 Reasons Why", + "Star Wars: Episode VII - The Force Awakens", + "The 100", + "Open Season 2", + "11.22.63" + ] +} \ No newline at end of file diff --git a/libs/common/guessit/test/test_api.py b/libs/common/guessit/test/test_api.py index 9abb84d9..391dbced 100644 --- a/libs/common/guessit/test/test_api.py +++ b/libs/common/guessit/test/test_api.py @@ -1,13 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement - +import json import os +import sys import pytest import six -from ..api import guessit, properties, GuessitException +from ..api import guessit, properties, suggested_expected, GuessitException __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) @@ -27,12 +28,16 @@ def test_forced_binary(): assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type) -@pytest.mark.skipif('sys.version_info < (3, 4)', reason="Path is not available") +@pytest.mark.skipif(sys.version_info < (3, 4), reason="Path is not available") def test_pathlike_object(): - from pathlib import Path - path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') - ret = guessit(path) - assert ret and 'title' in ret + try: + from pathlib import Path + + path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') + ret = guessit(path) + assert ret and 'title' in ret + except ImportError: # pragma: no-cover + pass def test_unicode_japanese(): @@ -69,3 +74,10 @@ def test_exception(): assert "An internal error has occured in guessit" in str(excinfo.value) assert "Guessit Exception Report" in str(excinfo.value) assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value) + + +def test_suggested_expected(): + with open(os.path.join(__location__, 'suggested.json'), 'r') as f: + content = json.load(f) + actual = suggested_expected(content['titles']) + assert actual == content['suggested'] diff --git a/libs/common/guessit/test/test_yml.py b/libs/common/guessit/test/test_yml.py index 4f58a056..040796de 100644 --- a/libs/common/guessit/test/test_yml.py +++ b/libs/common/guessit/test/test_yml.py @@ -7,9 +7,8 @@ import os from io import open # pylint: disable=redefined-builtin import babelfish -import pytest -import six -import yaml +import six # pylint:disable=wrong-import-order +import yaml # pylint:disable=wrong-import-order from rebulk.remodule import re from rebulk.utils import is_iterable @@ -21,13 +20,6 @@ logger = logging.getLogger(__name__) __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) -filename_predicate = None -string_predicate = None - - -# filename_predicate = lambda filename: 'episode_title' in filename -# string_predicate = lambda string: '-DVD.BlablaBla.Fix.Blablabla.XVID' in string - class EntryResult(object): def __init__(self, string, negates=False): @@ -134,7 +126,49 @@ class TestYml(object): options_re = re.compile(r'^([ +-]+)(.*)') - files, ids = files_and_ids(filename_predicate) + def _get_unique_id(self, collection, base_id): + ret = base_id + i = 2 + while ret in collection: + suffix = "-" + str(i) + ret = base_id + suffix + i += 1 + return ret + + def pytest_generate_tests(self, metafunc): + if 'yml_test_case' in metafunc.fixturenames: + entries = [] + entry_ids = [] + entry_set = set() + + for filename, _ in zip(*files_and_ids()): + with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile: + data = yaml.load(infile, OrderedDictYAMLLoader) + + last_expected = None + for string, expected in reversed(list(data.items())): + if expected is None: + data[string] = last_expected + else: + last_expected = expected + + default = None + try: + default = data['__default__'] + del data['__default__'] + except KeyError: + pass + + for string, expected in data.items(): + TestYml.set_default(expected, default) + string = TestYml.fix_encoding(string, expected) + + entries.append((filename, string, expected)) + unique_id = self._get_unique_id(entry_set, '[' + filename + '] ' + str(string)) + entry_set.add(unique_id) + entry_ids.append(unique_id) + + metafunc.parametrize('yml_test_case', entries, ids=entry_ids) @staticmethod def set_default(expected, default): @@ -143,34 +177,8 @@ class TestYml(object): if k not in expected: expected[k] = v - @pytest.mark.parametrize('filename', files, ids=ids) - def test(self, filename, caplog): - caplog.set_level(logging.INFO) - with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile: - data = yaml.load(infile, OrderedDictYAMLLoader) - entries = Results() - - last_expected = None - for string, expected in reversed(list(data.items())): - if expected is None: - data[string] = last_expected - else: - last_expected = expected - - default = None - try: - default = data['__default__'] - del data['__default__'] - except KeyError: - pass - - for string, expected in data.items(): - TestYml.set_default(expected, default) - entry = self.check_data(filename, string, expected) - entries.append(entry) - entries.assert_ok() - - def check_data(self, filename, string, expected): + @classmethod + def fix_encoding(cls, string, expected): if six.PY2: if isinstance(string, six.text_type): string = string.encode('utf-8') @@ -183,16 +191,23 @@ class TestYml(object): expected[k] = v if not isinstance(string, str): string = str(string) - if not string_predicate or string_predicate(string): # pylint: disable=not-callable - entry = self.check(string, expected) - if entry.ok: - logger.debug('[%s] %s', filename, entry) - elif entry.warning: - logger.warning('[%s] %s', filename, entry) - elif entry.error: - logger.error('[%s] %s', filename, entry) - for line in entry.details: - logger.error('[%s] %s', filename, ' ' * 4 + line) + return string + + def test_entry(self, yml_test_case): + filename, string, expected = yml_test_case + result = self.check_data(filename, string, expected) + assert not result.error + + def check_data(self, filename, string, expected): + entry = self.check(string, expected) + if entry.ok: + logger.debug('[%s] %s', filename, entry) + elif entry.warning: + logger.warning('[%s] %s', filename, entry) + elif entry.error: + logger.error('[%s] %s', filename, entry) + for line in entry.details: + logger.error('[%s] %s', filename, ' ' * 4 + line) return entry def check(self, string, expected): diff --git a/libs/common/guessit/test/various.yml b/libs/common/guessit/test/various.yml index 5e689e0b..6fb58deb 100644 --- a/libs/common/guessit/test/various.yml +++ b/libs/common/guessit/test/various.yml @@ -946,3 +946,254 @@ source: Blu-ray audio_codec: DTS-HD type: movie + +? Mr Robot - S03E01 - eps3 0 power-saver-mode h (1080p AMZN WEB-DL x265 HEVC 10bit EAC3 6.0 RCVR).mkv +: title: Mr Robot + season: 3 + episode: 1 + episode_title: eps3 0 power-saver-mode h + screen_size: 1080p + streaming_service: Amazon Prime + source: Web + video_codec: H.265 + video_profile: High Efficiency Video Coding + color_depth: 10-bit + audio_codec: Dolby Digital Plus + audio_channels: '5.1' + release_group: RCVR + container: mkv + type: episode + +? Panorama.15-05-2018.Web-DL.540p.H264.AAC.Subs.mp4 +: title: Panorama + date: 2018-05-15 + source: Web + screen_size: 540p + video_codec: H.264 + audio_codec: AAC + subtitle_language: und + container: mp4 + type: episode + +? Shaolin 2011.720p.BluRay.x264-x0r.mkv +: title: Shaolin + year: 2011 + screen_size: 720p + source: Blu-ray + video_codec: H.264 + release_group: x0r + container: mkv + type: movie + +? '[ Engineering Catastrophes S02E10 1080p AMZN WEB-DL DD+ 2.0 x264-TrollHD ]' +: title: Engineering Catastrophes + season: 2 + episode: 10 + screen_size: 1080p + streaming_service: Amazon Prime + source: Web + audio_codec: Dolby Digital Plus + audio_channels: '2.0' + video_codec: H.264 + release_group: TrollHD + type: episode + +? A Very Harold & Kumar 3D Christmas (2011).mkv +: title: A Very Harold & Kumar 3D Christmas + year: 2011 + container: mkv + type: movie + +? Cleveland.Hustles.S01E03.Downward.Dogs.and.Proper.Pigs.720p.HDTV.x264-W4F +: title: Cleveland Hustles + season: 1 + episode: 3 + episode_title: Downward Dogs and Proper Pigs + screen_size: 720p + source: HDTV + video_codec: H.264 + release_group: W4F + type: episode + +? Pawn.Stars.S12E20.The.Pawn.Awakens.REAL.READ.NFO.720p.HDTV.x264-DHD +: title: Pawn Stars + season: 12 + episode: 20 + episode_title: The Pawn Awakens + other: + - Proper + - Read NFO + proper_count: 2 + screen_size: 720p + source: HDTV + video_codec: H.264 + release_group: DHD + type: episode + +? Pawn.Stars.S12E22.Racing.Revolution.REAL.720p.HDTV.x264-DHD +: title: Pawn Stars + season: 12 + episode: 22 + episode_title: Racing Revolution + other: Proper + proper_count: 2 + screen_size: 720p + source: HDTV + video_codec: H.264 + release_group: DHD + type: episode + +? Luksusfellen.S18E02.REAL.NORWEGiAN.720p.WEB.h264-NORPiLT +: title: Luksusfellen + season: 18 + episode: 2 + other: Proper + proper_count: 2 + language: Norwegian + screen_size: 720p + source: Web + video_codec: H.264 + release_group: NORPiLT + type: episode + +? The.Exorcist.S02E07.REAL.FRENCH.720p.HDTV.x264-SH0W +: title: The Exorcist + season: 2 + episode: 7 + other: Proper + proper_count: 2 + language: fr + screen_size: 720p + source: HDTV + video_codec: H.264 + release_group: SH0W + type: episode + +? Outrageous.Acts.of.Science.S05E02.Is.This.for.Real.720p.HDTV.x264-DHD +: title: Outrageous Acts of Science + season: 5 + episode: 2 +# corner case +# episode_title: Is This for Real + screen_size: 720p + source: HDTV + video_codec: H.264 + release_group: DHD + type: episode + +? How.the.Universe.Works.S06E08.Strange.Lives.of.Dwarf.Planets.REAL.720p.WEB.x264-DHD +: title: How the Universe Works + season: 6 + episode: 8 + episode_title: Strange Lives of Dwarf Planets + other: Proper + proper_count: 2 + screen_size: 720p + source: Web + video_codec: H.264 + release_group: DHD + type: episode + +? Vampirina.S01E16.REAL.HDTV.x264-W4F +: title: Vampirina + season: 1 + episode: 16 + other: Proper + proper_count: 2 + source: HDTV + video_codec: H.264 + release_group: W4F + type: episode + +? Test.S01E16.Some Real Episode Title.HDTV.x264-W4F +: title: Test + season: 1 + episode: 16 + episode_title: Some Real Episode Title + source: HDTV + video_codec: H.264 + release_group: W4F + type: episode + +? NOS4A2.S01E01.The.Shorter.Way.REPACK.720p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv +: title: NOS4A2 + season: 1 + episode: 1 + episode_title: The Shorter Way + other: Proper + proper_count: 1 + screen_size: 720p + streaming_service: Amazon Prime + source: Web + audio_codec: Dolby Digital Plus + audio_channels: '5.1' + video_codec: H.264 + release_group: NTG + container: mkv + type: episode + +? Star Trek DS9 Ep 2x03 The Siege (Part III) +: title: Star Trek DS9 + season: 2 + episode: 3 + episode_title: The Siege + part: 3 + type: episode + +? The.Red.Line.S01E01 +: title: The Red Line + season: 1 + episode: 1 + type: episode + +? Show.S01E01.WEB.x264-METCON.mkv +: title: Show + season: 1 + episode: 1 + source: Web + video_codec: H.264 + release_group: METCON + container: mkv + type: episode + +? Show.S01E01.WEB.x264-TCMEON.mkv +: title: Show + season: 1 + episode: 1 + source: Web + video_codec: H.264 + release_group: TCMEON + container: mkv + type: episode + +? Show.S01E01.WEB.x264-MEONTC.mkv +: title: Show + season: 1 + episode: 1 + source: Web + video_codec: H.264 + release_group: MEONTC + container: mkv + type: episode + +? '[TorrentCouch.com].Westworld.S02.Complete.720p.WEB-DL.x264.[MP4].[5.3GB].[Season.2.Full]/[TorrentCouch.com].Westworld.S02E03.720p.WEB-DL.x264.mp4' +: website: TorrentCouch.com + title: Westworld + season: 2 + other: Complete + screen_size: 720p + source: Web + video_codec: H.264 + container: mp4 + size: 5.3GB + episode: 3 + type: episode + +? Vita.&.Virginia.2018.720p.H.264.YTS.LT.mp4 +: title: Vita & Virginia + year: 2018 + screen_size: 720p + video_codec: H.264 + release_group: YTS.LT + container: mp4 + type: movie \ No newline at end of file diff --git a/libs/common/guessit/yamlutils.py b/libs/common/guessit/yamlutils.py index 01ac7778..d04be641 100644 --- a/libs/common/guessit/yamlutils.py +++ b/libs/common/guessit/yamlutils.py @@ -10,19 +10,19 @@ except ImportError: # pragma: no-cover from ordereddict import OrderedDict # pylint:disable=import-error import babelfish -import yaml +import yaml # pylint:disable=wrong-import-order from .rules.common.quantity import BitRate, FrameRate, Size -class OrderedDictYAMLLoader(yaml.Loader): +class OrderedDictYAMLLoader(yaml.SafeLoader): """ A YAML loader that loads mappings into ordered dictionaries. From https://gist.github.com/enaeseth/844388 """ def __init__(self, *args, **kwargs): - yaml.Loader.__init__(self, *args, **kwargs) + yaml.SafeLoader.__init__(self, *args, **kwargs) self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map) self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map) @@ -58,7 +58,7 @@ class CustomDumper(yaml.SafeDumper): """ Custom YAML Dumper. """ - pass + pass # pylint:disable=unnecessary-pass def default_representer(dumper, data): diff --git a/libs/common/rebulk/__version__.py b/libs/common/rebulk/__version__.py index 1f96b77a..939c554c 100644 --- a/libs/common/rebulk/__version__.py +++ b/libs/common/rebulk/__version__.py @@ -4,4 +4,4 @@ Version module """ # pragma: no cover -__version__ = '1.0.0' +__version__ = '2.0.1' diff --git a/libs/common/rebulk/builder.py b/libs/common/rebulk/builder.py new file mode 100644 index 00000000..c91420aa --- /dev/null +++ b/libs/common/rebulk/builder.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Base builder class for Rebulk +""" +from abc import ABCMeta, abstractmethod +from copy import deepcopy +from logging import getLogger + +from six import add_metaclass + +from .loose import set_defaults +from .pattern import RePattern, StringPattern, FunctionalPattern + +log = getLogger(__name__).log + + +@add_metaclass(ABCMeta) +class Builder(object): + """ + Base builder class for patterns + """ + + def __init__(self): + self._defaults = {} + self._regex_defaults = {} + self._string_defaults = {} + self._functional_defaults = {} + self._chain_defaults = {} + + def reset(self): + """ + Reset all defaults. + + :return: + """ + self.__init__() + + def defaults(self, **kwargs): + """ + Define default keyword arguments for all patterns + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._defaults, override=True) + return self + + def regex_defaults(self, **kwargs): + """ + Define default keyword arguments for functional patterns. + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._regex_defaults, override=True) + return self + + def string_defaults(self, **kwargs): + """ + Define default keyword arguments for string patterns. + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._string_defaults, override=True) + return self + + def functional_defaults(self, **kwargs): + """ + Define default keyword arguments for functional patterns. + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._functional_defaults, override=True) + return self + + def chain_defaults(self, **kwargs): + """ + Define default keyword arguments for patterns chain. + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._chain_defaults, override=True) + return self + + def build_re(self, *pattern, **kwargs): + """ + Builds a new regular expression pattern + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(self._regex_defaults, kwargs) + set_defaults(self._defaults, kwargs) + return RePattern(*pattern, **kwargs) + + def build_string(self, *pattern, **kwargs): + """ + Builds a new string pattern + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(self._string_defaults, kwargs) + set_defaults(self._defaults, kwargs) + return StringPattern(*pattern, **kwargs) + + def build_functional(self, *pattern, **kwargs): + """ + Builds a new functional pattern + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(self._functional_defaults, kwargs) + set_defaults(self._defaults, kwargs) + return FunctionalPattern(*pattern, **kwargs) + + def build_chain(self, **kwargs): + """ + Builds a new patterns chain + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + from .chain import Chain + set_defaults(self._chain_defaults, kwargs) + set_defaults(self._defaults, kwargs) + chain = Chain(self, **kwargs) + chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access + chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access + chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access + chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access + chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access + return chain + + @abstractmethod + def pattern(self, *pattern): + """ + Register a list of Pattern instance + :param pattern: + :return: + """ + pass + + def regex(self, *pattern, **kwargs): + """ + Add re pattern + + :param pattern: + :type pattern: + :return: self + :rtype: Rebulk + """ + return self.pattern(self.build_re(*pattern, **kwargs)) + + def string(self, *pattern, **kwargs): + """ + Add string pattern + + :param pattern: + :type pattern: + :return: self + :rtype: Rebulk + """ + return self.pattern(self.build_string(*pattern, **kwargs)) + + def functional(self, *pattern, **kwargs): + """ + Add functional pattern + + :param pattern: + :type pattern: + :return: self + :rtype: Rebulk + """ + functional = self.build_functional(*pattern, **kwargs) + return self.pattern(functional) + + def chain(self, **kwargs): + """ + Add patterns chain, using configuration of this rebulk + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + chain = self.build_chain(**kwargs) + self.pattern(chain) + return chain diff --git a/libs/common/rebulk/chain.py b/libs/common/rebulk/chain.py index dfb6ea44..ba31ec9a 100644 --- a/libs/common/rebulk/chain.py +++ b/libs/common/rebulk/chain.py @@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group # pylint: disable=super-init-not-called import itertools -from .loose import call, set_defaults +from .builder import Builder +from .loose import call from .match import Match, Matches -from .pattern import Pattern, filter_match_kwargs +from .pattern import Pattern, filter_match_kwargs, BasePattern from .remodule import re @@ -19,150 +20,46 @@ class _InvalidChainException(Exception): pass -class Chain(Pattern): +class Chain(Pattern, Builder): """ Definition of a pattern chain to search for. """ - def __init__(self, rebulk, chain_breaker=None, **kwargs): - call(super(Chain, self).__init__, **kwargs) + def __init__(self, parent, chain_breaker=None, **kwargs): + Builder.__init__(self) + call(Pattern.__init__, self, **kwargs) self._kwargs = kwargs self._match_kwargs = filter_match_kwargs(kwargs) - self._defaults = {} - self._regex_defaults = {} - self._string_defaults = {} - self._functional_defaults = {} if callable(chain_breaker): self.chain_breaker = chain_breaker else: self.chain_breaker = None - self.rebulk = rebulk + self.parent = parent self.parts = [] - def defaults(self, **kwargs): + def pattern(self, *pattern): """ - Define default keyword arguments for all patterns - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._defaults = kwargs - return self - - def regex_defaults(self, **kwargs): - """ - Define default keyword arguments for functional patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._regex_defaults = kwargs - return self - - def string_defaults(self, **kwargs): - """ - Define default keyword arguments for string patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._string_defaults = kwargs - return self - - def functional_defaults(self, **kwargs): - """ - Define default keyword arguments for functional patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._functional_defaults = kwargs - return self - - def chain(self): - """ - Add patterns chain, using configuration from this chain - - :return: - :rtype: - """ - # pylint: disable=protected-access - chain = self.rebulk.chain(**self._kwargs) - chain._defaults = dict(self._defaults) - chain._regex_defaults = dict(self._regex_defaults) - chain._functional_defaults = dict(self._functional_defaults) - chain._string_defaults = dict(self._string_defaults) - return chain - - def regex(self, *pattern, **kwargs): - """ - Add re pattern :param pattern: - :type pattern: - :param kwargs: - :type kwargs: :return: - :rtype: """ - set_defaults(self._kwargs, kwargs) - set_defaults(self._regex_defaults, kwargs) - set_defaults(self._defaults, kwargs) - pattern = self.rebulk.build_re(*pattern, **kwargs) - part = ChainPart(self, pattern) - self.parts.append(part) - return part - - def functional(self, *pattern, **kwargs): - """ - Add functional pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._kwargs, kwargs) - set_defaults(self._functional_defaults, kwargs) - set_defaults(self._defaults, kwargs) - pattern = self.rebulk.build_functional(*pattern, **kwargs) - part = ChainPart(self, pattern) - self.parts.append(part) - return part - - def string(self, *pattern, **kwargs): - """ - Add string pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._kwargs, kwargs) - set_defaults(self._functional_defaults, kwargs) - set_defaults(self._defaults, kwargs) - pattern = self.rebulk.build_string(*pattern, **kwargs) - part = ChainPart(self, pattern) + if not pattern: + raise ValueError("One pattern should be given to the chain") + if len(pattern) > 1: + raise ValueError("Only one pattern can be given to the chain") + part = ChainPart(self, pattern[0]) self.parts.append(part) return part def close(self): """ - Close chain builder to continue registering other pattern - - :return: - :rtype: + Deeply close the chain + :return: Rebulk instance """ - return self.rebulk + parent = self.parent + while isinstance(parent, Chain): + parent = parent.parent + return parent def _match(self, pattern, input_string, context=None): # pylint: disable=too-many-locals,too-many-nested-blocks @@ -173,42 +70,20 @@ class Chain(Pattern): chain_found = False current_chain_matches = [] valid_chain = True - is_chain_start = True for chain_part in self.parts: try: - chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part, - chain_input_string, - context) - - Chain._fix_matches_offset(chain_part_matches, input_string, offset) - Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset) - - if raw_chain_part_matches: - grouped_matches_dict = dict() - for match_index, match in itertools.groupby(chain_part_matches, - lambda m: m.match_index): - grouped_matches_dict[match_index] = list(match) - - grouped_raw_matches_dict = dict() - for match_index, raw_match in itertools.groupby(raw_chain_part_matches, - lambda m: m.match_index): - grouped_raw_matches_dict[match_index] = list(raw_match) - - for match_index, grouped_raw_matches in grouped_raw_matches_dict.items(): - chain_found = True - offset = grouped_raw_matches[-1].raw_end - chain_input_string = input_string[offset:] - if not chain_part.is_hidden: - grouped_matches = grouped_matches_dict.get(match_index, []) - if self._chain_breaker_eval(current_chain_matches + grouped_matches): - current_chain_matches.extend(grouped_matches) + chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string, + context, + with_raw_matches=True) + chain_found, chain_input_string, offset = \ + self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found, + input_string, chain_input_string, offset, current_chain_matches) except _InvalidChainException: valid_chain = False if current_chain_matches: offset = current_chain_matches[0].raw_end break - is_chain_start = False if not chain_found: break if current_chain_matches and valid_chain: @@ -217,38 +92,66 @@ class Chain(Pattern): return chain_matches - def _match_parent(self, match, yield_parent): + def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found, + input_string, chain_input_string, offset, current_chain_matches): + Chain._fix_matches_offset(chain_part_matches, input_string, offset) + Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset) + + if raw_chain_part_matches: + grouped_matches_dict = self._group_by_match_index(chain_part_matches) + grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches) + + for match_index, grouped_raw_matches in grouped_raw_matches_dict.items(): + chain_found = True + offset = grouped_raw_matches[-1].raw_end + chain_input_string = input_string[offset:] + + if not chain_part.is_hidden: + grouped_matches = grouped_matches_dict.get(match_index, []) + if self._chain_breaker_eval(current_chain_matches + grouped_matches): + current_chain_matches.extend(grouped_matches) + return chain_found, chain_input_string, offset + + def _process_match(self, match, match_index, child=False): """ - Handle a parent match + Handle a match :param match: :type match: - :param yield_parent: - :type yield_parent: + :param match_index: + :type match_index: + :param child: + :type child: :return: :rtype: """ - ret = super(Chain, self)._match_parent(match, yield_parent) - original_children = Matches(match.children) - original_end = match.end - while not ret and match.children: - last_pattern = match.children[-1].pattern - last_pattern_children = [child for child in match.children if child.pattern == last_pattern] - last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index) - last_pattern_groups = {} - for index, matches in last_pattern_groups_iter: - last_pattern_groups[index] = list(matches) + # pylint: disable=too-many-locals + ret = super(Chain, self)._process_match(match, match_index, child=child) + if ret: + return True - for index in reversed(list(last_pattern_groups)): - last_matches = list(last_pattern_groups[index]) - for last_match in last_matches: - match.children.remove(last_match) - match.end = match.children[-1].end if match.children else match.start - ret = super(Chain, self)._match_parent(match, yield_parent) - if ret: - return True - match.children = original_children - match.end = original_end - return ret + if match.children: + last_pattern = match.children[-1].pattern + last_pattern_groups = self._group_by_match_index( + [child_ for child_ in match.children if child_.pattern == last_pattern] + ) + + if last_pattern_groups: + original_children = Matches(match.children) + original_end = match.end + + for index in reversed(list(last_pattern_groups)): + last_matches = last_pattern_groups[index] + for last_match in last_matches: + match.children.remove(last_match) + match.end = match.children[-1].end if match.children else match.start + ret = super(Chain, self)._process_match(match, match_index, child=child) + if ret: + return True + + match.children = original_children + match.end = original_end + + return False def _build_chain_match(self, current_chain_matches, input_string): start = None @@ -282,46 +185,11 @@ class Chain(Pattern): Chain._fix_matches_offset(chain_part_match.children, input_string, offset) @staticmethod - def _match_chain_part(is_chain_start, chain_part, chain_input_string, context): - chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context, - with_raw_matches=True) - chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, - chain_input_string) - raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part, - chain_input_string) - - Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part) - return chain_part_matches, raw_chain_part_matches - - @staticmethod - def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string): - if not chain_part_matches: - return chain_part_matches - - if not is_chain_start: - separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start] - if separator: - return [] - - j = 1 - for i in range(0, len(chain_part_matches) - 1): - separator = chain_input_string[chain_part_matches[i].initiator.raw_end: - chain_part_matches[i + 1].initiator.raw_start] - if separator: - break - j += 1 - truncated = chain_part_matches[:j] - if chain_part.repeater_end is not None: - truncated = [m for m in truncated if m.match_index < chain_part.repeater_end] - return truncated - - @staticmethod - def _validate_chain_part_matches(chain_part_matches, chain_part): - max_match_index = -1 - if chain_part_matches: - max_match_index = max([m.match_index for m in chain_part_matches]) - if max_match_index + 1 < chain_part.repeater_start: - raise _InvalidChainException + def _group_by_match_index(matches): + grouped_matches_dict = dict() + for match_index, match in itertools.groupby(matches, lambda m: m.match_index): + grouped_matches_dict[match_index] = list(match) + return grouped_matches_dict @property def match_options(self): @@ -338,7 +206,7 @@ class Chain(Pattern): return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts) -class ChainPart(object): +class ChainPart(BasePattern): """ Part of a pattern chain. """ @@ -350,6 +218,51 @@ class ChainPart(object): self.repeater_end = 1 self._hidden = False + @property + def _is_chain_start(self): + return self._chain.parts[0] == self + + def matches(self, input_string, context=None, with_raw_matches=False): + matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True) + + matches = self._truncate_repeater(matches, input_string) + raw_matches = self._truncate_repeater(raw_matches, input_string) + + self._validate_repeater(raw_matches) + + if with_raw_matches: + return matches, raw_matches + + return matches + + def _truncate_repeater(self, matches, input_string): + if not matches: + return matches + + if not self._is_chain_start: + separator = input_string[0:matches[0].initiator.raw_start] + if separator: + return [] + + j = 1 + for i in range(0, len(matches) - 1): + separator = input_string[matches[i].initiator.raw_end: + matches[i + 1].initiator.raw_start] + if separator: + break + j += 1 + truncated = matches[:j] + if self.repeater_end is not None: + truncated = [m for m in truncated if m.match_index < self.repeater_end] + return truncated + + def _validate_repeater(self, matches): + max_match_index = -1 + if matches: + max_match_index = max([m.match_index for m in matches]) + if max_match_index + 1 < self.repeater_start: + raise _InvalidChainException + def chain(self): """ Add patterns chain, using configuration from this chain diff --git a/libs/common/rebulk/formatters.py b/libs/common/rebulk/formatters.py index 47046942..7175a54a 100644 --- a/libs/common/rebulk/formatters.py +++ b/libs/common/rebulk/formatters.py @@ -15,9 +15,19 @@ def formatters(*chained_formatters): :return: :rtype: """ + def formatters_chain(input_string): # pylint:disable=missing-docstring for chained_formatter in chained_formatters: input_string = chained_formatter(input_string) return input_string return formatters_chain + + +def default_formatter(input_string): + """ + Default formatter + :param input_string: + :return: + """ + return input_string diff --git a/libs/common/rebulk/introspector.py b/libs/common/rebulk/introspector.py index 64b9836f..bfefcb75 100644 --- a/libs/common/rebulk/introspector.py +++ b/libs/common/rebulk/introspector.py @@ -3,7 +3,7 @@ """ Introspect rebulk object to retrieve capabilities. """ -from abc import ABCMeta, abstractproperty +from abc import ABCMeta, abstractmethod from collections import defaultdict import six @@ -16,7 +16,8 @@ class Description(object): """ Abstract class for a description. """ - @abstractproperty + @property + @abstractmethod def properties(self): # pragma: no cover """ Properties of described object. diff --git a/libs/common/rebulk/loose.py b/libs/common/rebulk/loose.py index 427b69a0..423b4ea7 100644 --- a/libs/common/rebulk/loose.py +++ b/libs/common/rebulk/loose.py @@ -4,12 +4,12 @@ Various utilities functions """ - import sys -import inspect +from inspect import isclass try: from inspect import getfullargspec as getargspec + _fullargspec_supported = True except ImportError: _fullargspec_supported = False @@ -55,8 +55,8 @@ def call(function, *args, **kwargs): :return: sale vakye as default function call :rtype: object """ - func = constructor_args if inspect.isclass(function) else function_args - call_args, call_kwargs = func(function, *args, **kwargs) + func = constructor_args if isclass(function) else function_args + call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20 return function(*call_args, **call_kwargs) @@ -145,6 +145,8 @@ if not _fullargspec_supported: else: call_args = args[:len(argspec.args) - (1 if constructor else 0)] return call_args, call_kwarg + + argspec_args = argspec_args_legacy @@ -215,9 +217,12 @@ def filter_index(collection, predicate=None, index=None): return collection -def set_defaults(defaults, kwargs): +def set_defaults(defaults, kwargs, override=False): """ Set defaults from defaults dict to kwargs dict + + :param override: + :type override: :param defaults: :type defaults: :param kwargs: @@ -225,12 +230,13 @@ def set_defaults(defaults, kwargs): :return: :rtype: """ + if 'clear' in defaults.keys() and defaults.pop('clear'): + kwargs.clear() for key, value in defaults.items(): - if key not in kwargs and value is not None: + if key in kwargs: + if isinstance(value, list) and isinstance(kwargs[key], list): + kwargs[key] = list(value) + kwargs[key] + elif isinstance(value, dict) and isinstance(kwargs[key], dict): + set_defaults(value, kwargs[key]) + if key not in kwargs or override: kwargs[key] = value - elif isinstance(value, list) and isinstance(kwargs[key], list): - kwargs[key] = list(value) + kwargs[key] - elif isinstance(value, dict) and isinstance(kwargs[key], dict): - set_defaults(value, kwargs[key]) - elif key in kwargs and value is None: - kwargs[key] = None diff --git a/libs/common/rebulk/match.py b/libs/common/rebulk/match.py index 8bf41245..d8e72df4 100644 --- a/libs/common/rebulk/match.py +++ b/libs/common/rebulk/match.py @@ -815,6 +815,24 @@ class Match(object): return filter_index(ret, predicate, index) + def tagged(self, *tags): + """ + Check if this match has at least one of the provided tags + + :param tags: + :return: True if at least one tag is defined, False otherwise. + """ + return any(tag in self.tags for tag in tags) + + def named(self, *names): + """ + Check if one of the children match has one of the provided name + + :param names: + :return: True if at least one child is named with a given name is defined, False otherwise. + """ + return any(name in self.names for name in names) + def __len__(self): return self.end - self.start diff --git a/libs/common/rebulk/pattern.py b/libs/common/rebulk/pattern.py index 57b274e8..beb8b273 100644 --- a/libs/common/rebulk/pattern.py +++ b/libs/common/rebulk/pattern.py @@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty import six from . import debug +from .formatters import default_formatter from .loose import call, ensure_list, ensure_dict from .match import Match from .remodule import re, REGEX_AVAILABLE from .utils import find_all, is_iterable, get_first_defined +from .validators import allways_true @six.add_metaclass(ABCMeta) -class Pattern(object): +class BasePattern(object): + """ + Base class for Pattern like objects + """ + + @abstractmethod + def matches(self, input_string, context=None, with_raw_matches=False): + """ + Computes all matches for a given input + + :param input_string: the string to parse + :type input_string: str + :param context: the context + :type context: dict + :param with_raw_matches: should return details + :type with_raw_matches: dict + :return: matches based on input_string for this pattern + :rtype: iterator[Match] + """ + pass + + +@six.add_metaclass(ABCMeta) +class Pattern(BasePattern): """ Definition of a particular pattern to search for. """ @@ -25,7 +50,7 @@ class Pattern(object): def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False, private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None, marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None, - properties=None, post_processor=None, **kwargs): + properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs): """ :param name: Name of this pattern :type name: str @@ -66,15 +91,19 @@ class Pattern(object): :type disabled: bool|function :param log_lvl: Log level associated to this pattern :type log_lvl: int - :param post_process: Post processing function + :param post_processor: Post processing function :type post_processor: func + :param pre_match_processor: Pre match processing function + :type pre_match_processor: func + :param post_match_processor: Post match processing function + :type post_match_processor: func """ # pylint:disable=too-many-locals,unused-argument self.name = name self.tags = ensure_list(tags) - self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x) + self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter) self.values, self._default_value = ensure_dict(value, None) - self.validators, self._default_validator = ensure_dict(validator, lambda match: True) + self.validators, self._default_validator = ensure_dict(validator, allways_true) self.every = every self.children = children self.private = private @@ -96,6 +125,14 @@ class Pattern(object): self.post_processor = None else: self.post_processor = post_processor + if not callable(pre_match_processor): + self.pre_match_processor = None + else: + self.pre_match_processor = pre_match_processor + if not callable(post_match_processor): + self.post_match_processor = None + else: + self.post_match_processor = post_match_processor @property def log_level(self): @@ -106,83 +143,6 @@ class Pattern(object): """ return self._log_level if self._log_level is not None else debug.LOG_LEVEL - def _yield_children(self, match): - """ - Does this match has children - :param match: - :type match: - :return: - :rtype: - """ - return match.children and (self.children or self.every) - - def _yield_parent(self): - """ - Does this mat - :param match: - :type match: - :return: - :rtype: - """ - return not self.children or self.every - - def _match_parent(self, match, yield_parent): - """ - Handle a parent match - :param match: - :type match: - :param yield_parent: - :type yield_parent: - :return: - :rtype: - """ - if not match or match.value == "": - return False - - pattern_value = get_first_defined(self.values, [match.name, '__parent__', None], - self._default_value) - if pattern_value: - match.value = pattern_value - - if yield_parent or self.format_all: - match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None], - self._default_formatter) - if yield_parent or self.validate_all: - validator = get_first_defined(self.validators, [match.name, '__parent__', None], - self._default_validator) - if validator and not validator(match): - return False - return True - - def _match_child(self, child, yield_children): - """ - Handle a children match - :param child: - :type child: - :param yield_children: - :type yield_children: - :return: - :rtype: - """ - if not child or child.value == "": - return False - - pattern_value = get_first_defined(self.values, [child.name, '__children__', None], - self._default_value) - if pattern_value: - child.value = pattern_value - - if yield_children or self.format_all: - child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None], - self._default_formatter) - - if yield_children or self.validate_all: - validator = get_first_defined(self.validators, [child.name, '__children__', None], - self._default_validator) - if validator and not validator(child): - return False - return True - def matches(self, input_string, context=None, with_raw_matches=False): """ Computes all matches for a given input @@ -200,41 +160,168 @@ class Pattern(object): matches = [] raw_matches = [] + for pattern in self.patterns: - yield_parent = self._yield_parent() - match_index = -1 + match_index = 0 for match in self._match(pattern, input_string, context): - match_index += 1 - match.match_index = match_index raw_matches.append(match) - yield_children = self._yield_children(match) - if not self._match_parent(match, yield_parent): - continue - validated = True - for child in match.children: - if not self._match_child(child, yield_children): - validated = False - break - if validated: - if self.private_parent: - match.private = True - if self.private_children: - for child in match.children: - child.private = True - if yield_parent or self.private_parent: - matches.append(match) - if yield_children or self.private_children: - for child in match.children: - child.match_index = match_index - matches.append(child) - matches = self._matches_post_process(matches) - self._matches_privatize(matches) - self._matches_ignore(matches) + matches.extend(self._process_matches(match, match_index)) + match_index += 1 + + matches = self._post_process_matches(matches) + if with_raw_matches: return matches, raw_matches return matches - def _matches_post_process(self, matches): + @property + def _should_include_children(self): + """ + Check if children matches from this pattern should be included in matches results. + :param match: + :type match: + :return: + :rtype: + """ + return self.children or self.every + + @property + def _should_include_parent(self): + """ + Check is a match from this pattern should be included in matches results. + :param match: + :type match: + :return: + :rtype: + """ + return not self.children or self.every + + @staticmethod + def _match_config_property_keys(match, child=False): + if match.name: + yield match.name + if child: + yield '__children__' + else: + yield '__parent__' + yield None + + @staticmethod + def _process_match_index(match, match_index): + """ + Process match index from this pattern process state. + + :param match: + :return: + """ + match.match_index = match_index + + def _process_match_private(self, match, child=False): + """ + Process match privacy from this pattern configuration. + + :param match: + :param child: + :return: + """ + + if match.name and match.name in self.private_names or \ + not child and self.private_parent or \ + child and self.private_children: + match.private = True + + def _process_match_value(self, match, child=False): + """ + Process match value from this pattern configuration. + :param match: + :return: + """ + keys = self._match_config_property_keys(match, child=child) + pattern_value = get_first_defined(self.values, keys, self._default_value) + if pattern_value: + match.value = pattern_value + + def _process_match_formatter(self, match, child=False): + """ + Process match formatter from this pattern configuration. + + :param match: + :return: + """ + included = self._should_include_children if child else self._should_include_parent + if included or self.format_all: + keys = self._match_config_property_keys(match, child=child) + match.formatter = get_first_defined(self.formatters, keys, self._default_formatter) + + def _process_match_validator(self, match, child=False): + """ + Process match validation from this pattern configuration. + + :param match: + :return: True if match is validated by the configured validator, False otherwise. + """ + included = self._should_include_children if child else self._should_include_parent + if included or self.validate_all: + keys = self._match_config_property_keys(match, child=child) + validator = get_first_defined(self.validators, keys, self._default_validator) + if validator and not validator(match): + return False + return True + + def _process_match(self, match, match_index, child=False): + """ + Process match from this pattern by setting all properties from defined configuration + (index, private, value, formatter, validator, ...). + + :param match: + :type match: + :return: True if match is validated by the configured validator, False otherwise. + :rtype: + """ + self._process_match_index(match, match_index) + self._process_match_private(match, child) + self._process_match_value(match, child) + self._process_match_formatter(match, child) + return self._process_match_validator(match, child) + + @staticmethod + def _process_match_processor(match, processor): + if processor: + ret = processor(match) + if ret is not None: + return ret + return match + + def _process_matches(self, match, match_index): + """ + Process and generate all matches for the given unprocessed match. + :param match: + :param match_index: + :return: Process and dispatched matches. + """ + match = self._process_match_processor(match, self.pre_match_processor) + if not match: + return + + if not self._process_match(match, match_index): + return + + for child in match.children: + if not self._process_match(child, match_index, child=True): + return + + match = self._process_match_processor(match, self.post_match_processor) + if not match: + return + + if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names: + yield match + if self._should_include_children or self.private_children: + children = [x for x in match.children if x.name not in self.ignore_names] + for child in children: + yield child + + def _post_process_matches(self, matches): """ Post process matches with user defined function :param matches: @@ -246,32 +333,6 @@ class Pattern(object): return self.post_processor(matches, self) return matches - def _matches_privatize(self, matches): - """ - Mark matches included in private_names with private flag. - :param matches: - :type matches: - :return: - :rtype: - """ - if self.private_names: - for match in matches: - if match.name in self.private_names: - match.private = True - - def _matches_ignore(self, matches): - """ - Ignore matches included in ignore_names. - :param matches: - :type matches: - :return: - :rtype: - """ - if self.ignore_names: - for match in list(matches): - if match.name in self.ignore_names: - matches.remove(match) - @abstractproperty def patterns(self): # pragma: no cover """ @@ -306,7 +367,7 @@ class Pattern(object): @abstractmethod def _match(self, pattern, input_string, context=None): # pragma: no cover """ - Computes all matches for a given pattern and input + Computes all unprocess matches for a given pattern and input. :param pattern: the pattern to use :param input_string: the string to parse @@ -350,7 +411,9 @@ class StringPattern(Pattern): def _match(self, pattern, input_string, context=None): for index in find_all(input_string, pattern, **self._kwargs): - yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs) + match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs) + if match: + yield match class RePattern(Pattern): @@ -411,15 +474,18 @@ class RePattern(Pattern): for start, end in match_object.spans(i): child_match = Match(start, end, name=name, parent=main_match, pattern=self, input_string=input_string, **self._children_match_kwargs) - main_match.children.append(child_match) + if child_match: + main_match.children.append(child_match) else: start, end = match_object.span(i) if start > -1 and end > -1: child_match = Match(start, end, name=name, parent=main_match, pattern=self, input_string=input_string, **self._children_match_kwargs) - main_match.children.append(child_match) + if child_match: + main_match.children.append(child_match) - yield main_match + if main_match: + yield main_match class FunctionalPattern(Pattern): @@ -457,14 +523,18 @@ class FunctionalPattern(Pattern): if self._match_kwargs: options = self._match_kwargs.copy() options.update(args) - yield Match(pattern=self, input_string=input_string, **options) + match = Match(pattern=self, input_string=input_string, **options) + if match: + yield match else: kwargs = self._match_kwargs if isinstance(args[-1], dict): kwargs = dict(kwargs) kwargs.update(args[-1]) args = args[:-1] - yield Match(*args, pattern=self, input_string=input_string, **kwargs) + match = Match(*args, pattern=self, input_string=input_string, **kwargs) + if match: + yield match def filter_match_kwargs(kwargs, children=False): diff --git a/libs/common/rebulk/rebulk.py b/libs/common/rebulk/rebulk.py index 42fb6440..a6a0fd2f 100644 --- a/libs/common/rebulk/rebulk.py +++ b/libs/common/rebulk/rebulk.py @@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk """ from logging import getLogger +from .builder import Builder from .match import Matches - -from .pattern import RePattern, StringPattern, FunctionalPattern -from .chain import Chain - from .processors import ConflictSolver, PrivateRemover -from .loose import set_defaults -from .utils import extend_safe from .rules import Rules +from .utils import extend_safe log = getLogger(__name__).log -class Rebulk(object): +class Rebulk(Builder): r""" Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to chain ``string``, ``regex``, and ``functional`` methods to define various patterns types. @@ -44,6 +40,7 @@ class Rebulk(object): >>> bulk.matches("the lakers are from la") [, ] """ + # pylint:disable=protected-access def __init__(self, disabled=lambda context: False, default_rules=True): @@ -56,6 +53,7 @@ class Rebulk(object): :return: :rtype: """ + super(Rebulk, self).__init__() if not callable(disabled): self.disabled = lambda context: disabled else: @@ -64,11 +62,6 @@ class Rebulk(object): self._rules = Rules() if default_rules: self.rules(ConflictSolver, PrivateRemover) - self._defaults = {} - self._regex_defaults = {} - self._string_defaults = {} - self._functional_defaults = {} - self._chain_defaults = {} self._rebulks = [] def pattern(self, *pattern): @@ -83,172 +76,6 @@ class Rebulk(object): self._patterns.extend(pattern) return self - def defaults(self, **kwargs): - """ - Define default keyword arguments for all patterns - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._defaults = kwargs - return self - - def regex_defaults(self, **kwargs): - """ - Define default keyword arguments for functional patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._regex_defaults = kwargs - return self - - def regex(self, *pattern, **kwargs): - """ - Add re pattern - - :param pattern: - :type pattern: - :return: self - :rtype: Rebulk - """ - self.pattern(self.build_re(*pattern, **kwargs)) - return self - - def build_re(self, *pattern, **kwargs): - """ - Builds a new regular expression pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._regex_defaults, kwargs) - set_defaults(self._defaults, kwargs) - return RePattern(*pattern, **kwargs) - - def string_defaults(self, **kwargs): - """ - Define default keyword arguments for string patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._string_defaults = kwargs - return self - - def string(self, *pattern, **kwargs): - """ - Add string pattern - - :param pattern: - :type pattern: - :return: self - :rtype: Rebulk - """ - self.pattern(self.build_string(*pattern, **kwargs)) - return self - - def build_string(self, *pattern, **kwargs): - """ - Builds a new string pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._string_defaults, kwargs) - set_defaults(self._defaults, kwargs) - return StringPattern(*pattern, **kwargs) - - def functional_defaults(self, **kwargs): - """ - Define default keyword arguments for functional patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._functional_defaults = kwargs - return self - - def functional(self, *pattern, **kwargs): - """ - Add functional pattern - - :param pattern: - :type pattern: - :return: self - :rtype: Rebulk - """ - self.pattern(self.build_functional(*pattern, **kwargs)) - return self - - def build_functional(self, *pattern, **kwargs): - """ - Builds a new functional pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._functional_defaults, kwargs) - set_defaults(self._defaults, kwargs) - return FunctionalPattern(*pattern, **kwargs) - - def chain_defaults(self, **kwargs): - """ - Define default keyword arguments for patterns chain. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._chain_defaults = kwargs - return self - - def chain(self, **kwargs): - """ - Add patterns chain, using configuration of this rebulk - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - chain = self.build_chain(**kwargs) - self._patterns.append(chain) - return chain - - def build_chain(self, **kwargs): - """ - Builds a new patterns chain - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._chain_defaults, kwargs) - set_defaults(self._defaults, kwargs) - return Chain(self, **kwargs) - def rules(self, *rules): """ Add rules as a module, class or instance. diff --git a/libs/common/rebulk/test/test_chain.py b/libs/common/rebulk/test/test_chain.py index 2715abc2..f3995546 100644 --- a/libs/common/rebulk/test/test_chain.py +++ b/libs/common/rebulk/test/test_chain.py @@ -2,11 +2,11 @@ # -*- coding: utf-8 -*- # pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition import re - from functools import partial +from rebulk.pattern import FunctionalPattern, StringPattern, RePattern +from ..rebulk import Rebulk from ..validators import chars_surround -from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern def test_chain_close(): @@ -63,18 +63,61 @@ def test_build_chain(): def test_chain_defaults(): rebulk = Rebulk() - rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True) + rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True) - rebulk.chain()\ + rebulk.chain() \ .regex("(?Ptest)") \ .regex(" ").repeater("*") \ + .regex("(?Pbest)") \ + .regex(" ").repeater("*") \ .regex("(?PtestIgnore)") - matches = rebulk.matches("test testIgnore") + matches = rebulk.matches("test best testIgnore") assert len(matches) == 1 assert matches[0].name == "test" +def test_chain_with_validators(): + def chain_validator(match): + return match.value.startswith('t') and match.value.endswith('t') + + def default_validator(match): + return match.value.startswith('t') and match.value.endswith('g') + + def custom_validator(match): + return match.value.startswith('b') and match.value.endswith('t') + + rebulk = Rebulk() + rebulk.defaults(children=True, validator=default_validator) + + rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \ + .regex("(?Ptesting)", validator=default_validator).repeater("+") \ + .regex(" ").repeater("+") \ + .regex("(?Pbest)", validator=custom_validator).repeater("+") + matches = rebulk.matches("some testing best end") + + assert len(matches) == 2 + assert matches[0].name == "test" + assert matches[1].name == "best" + + +def test_matches_docs(): + rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \ + .defaults(children=True, formatter={'episode': int, 'version': int}) \ + .chain() \ + .regex(r'e(?P\d{1,4})').repeater(1) \ + .regex(r'v(?P\d+)').repeater('?') \ + .regex(r'[ex-](?P\d{1,4})').repeater('*') \ + .close() # .repeater(1) could be omitted as it's the default behavior + + result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict + + assert 'episode' in result + assert result['episode'] == [14, 15, 16, 17] + assert 'version' in result + assert result['version'] == 2 + + def test_matches(): rebulk = Rebulk() @@ -144,8 +187,8 @@ def test_matches(): def test_matches_2(): rebulk = Rebulk() \ .regex_defaults(flags=re.IGNORECASE) \ - .chain(children=True, formatter={'episode': int}) \ - .defaults(formatter={'version': int}) \ + .defaults(children=True, formatter={'episode': int, 'version': int}) \ + .chain() \ .regex(r'e(?P\d{1,4})') \ .regex(r'v(?P\d+)').repeater('?') \ .regex(r'[ex-](?P\d{1,4})').repeater('*') \ @@ -173,25 +216,32 @@ def test_matches_2(): def test_matches_3(): alt_dash = (r'@', r'[\W_]') # abbreviation - rebulk = Rebulk() + match_names = ['season', 'episode'] + other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] - rebulk.chain(formatter={'season': int, 'episode': int}, - tags=['SxxExx'], - abbreviations=[alt_dash], - private_names=['episodeSeparator', 'seasonSeparator'], - children=True, - private_parent=True, - conflict_solver=lambda match, other: match - if match.name in ['season', 'episode'] and other.name in - ['screen_size', 'video_codec', 'audio_codec', - 'audio_channels', 'container', 'date'] - else '__default__') \ + rebulk = Rebulk() + rebulk.defaults(formatter={'season': int, 'episode': int}, + tags=['SxxExx'], + abbreviations=[alt_dash], + private_names=['episodeSeparator', 'seasonSeparator'], + children=True, + private_parent=True, + conflict_solver=lambda match, other: match + if match.name in match_names and other.name in other_names + else '__default__') + + rebulk.chain() \ + .defaults(children=True, private_parent=True) \ .regex(r'(?P\d+)@?x@?(?P\d+)') \ .regex(r'(?Px|-|\+|&)(?P\d+)').repeater('*') \ + .close() \ .chain() \ + .defaults(children=True, private_parent=True) \ .regex(r'S(?P\d+)@?(?:xE|Ex|E|x)@?(?P\d+)') \ .regex(r'(?:(?PxE|Ex|E|x|-|\+|&)(?P\d+))').repeater('*') \ + .close() \ .chain() \ + .defaults(children=True, private_parent=True) \ .regex(r'S(?P\d+)') \ .regex(r'(?PS|-|\+|&)(?P\d+)').repeater('*') @@ -240,11 +290,11 @@ def test_matches_4(): rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE) - rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, - validator={'__parent__': seps_surround}, children=True, private_parent=True) + rebulk.defaults(validate_all=True, children=True) + rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True) - rebulk.chain(formatter={'episode': int, 'version': int}) \ - .defaults(validator=None) \ + rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \ + .defaults(formatter={'episode': int, 'version': int}) \ .regex(r'e(?P\d{1,4})') \ .regex(r'v(?P\d+)').repeater('?') \ .regex(r'(?Pe|x|-)(?P\d{1,4})').repeater('*') @@ -262,11 +312,11 @@ def test_matches_5(): rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE) - rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, - validator={'__parent__': seps_surround}, children=True, private_parent=True) - rebulk.chain(formatter={'episode': int, 'version': int}) \ - .defaults(validator=None) \ + rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, + validator={'__parent__': seps_surround}, children=True, private_parent=True, + formatter={'episode': int, 'version': int}) \ + .defaults(children=True, private_parent=True) \ .regex(r'e(?P\d{1,4})') \ .regex(r'v(?P\d+)').repeater('?') \ .regex(r'(?Pe|x|-)(?P\d{1,4})').repeater('{2,3}') @@ -288,7 +338,7 @@ def test_matches_6(): validator=None, children=True, private_parent=True) rebulk.chain(formatter={'episode': int, 'version': int}) \ - .defaults(validator=None) \ + .defaults(children=True, private_parent=True) \ .regex(r'e(?P\d{1,4})') \ .regex(r'v(?P\d+)').repeater('?') \ .regex(r'(?Pe|x|-)(?P\d{1,4})').repeater('{2,3}') diff --git a/libs/common/rebulk/test/test_debug.py b/libs/common/rebulk/test/test_debug.py index cd9e556d..8abdac5f 100644 --- a/libs/common/rebulk/test/test_debug.py +++ b/libs/common/rebulk/test/test_debug.py @@ -2,19 +2,15 @@ # -*- coding: utf-8 -*- # pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition +from .default_rules_module import RuleRemove0 +from .. import debug +from ..match import Match from ..pattern import StringPattern from ..rebulk import Rebulk -from ..match import Match -from .. import debug -from .default_rules_module import RuleRemove0 class TestDebug(object): - - - #request.addfinalizer(disable_debug) - - + # request.addfinalizer(disable_debug) debug.DEBUG = True pattern = StringPattern(1, 3, value="es") @@ -38,43 +34,43 @@ class TestDebug(object): debug.DEBUG = False def test_pattern(self): - assert self.pattern.defined_at.lineno == 20 + assert self.pattern.defined_at.lineno > 0 assert self.pattern.defined_at.name == 'rebulk.test.test_debug' assert self.pattern.defined_at.filename.endswith('test_debug.py') - assert str(self.pattern.defined_at) == 'test_debug.py#L20' - assert repr(self.pattern) == '' + assert str(self.pattern.defined_at).startswith('test_debug.py#L') + assert repr(self.pattern).startswith(' 0 assert self.match.defined_at.name == 'rebulk.test.test_debug' assert self.match.defined_at.filename.endswith('test_debug.py') - assert str(self.match.defined_at) == 'test_debug.py#L22' + assert str(self.match.defined_at).startswith('test_debug.py#L') def test_rule(self): - assert self.rule.defined_at.lineno == 23 + assert self.rule.defined_at.lineno > 0 assert self.rule.defined_at.name == 'rebulk.test.test_debug' assert self.rule.defined_at.filename.endswith('test_debug.py') - assert str(self.rule.defined_at) == 'test_debug.py#L23' - assert repr(self.rule) == '' + assert str(self.rule.defined_at).startswith('test_debug.py#L') + assert repr(self.rule).startswith(' 0 assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug' assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py') - assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27'] + assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L') - assert self.rebulk._patterns[1].defined_at.lineno in [27, 28] + assert self.rebulk._patterns[1].defined_at.lineno > 0 assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug' assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py') - assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28'] + assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L') assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at diff --git a/libs/common/rebulk/test/test_match.py b/libs/common/rebulk/test/test_match.py index 87273d54..8750733a 100644 --- a/libs/common/rebulk/test/test_match.py +++ b/libs/common/rebulk/test/test_match.py @@ -116,6 +116,9 @@ class TestMatchesClass(object): assert "tag1" in matches.tags assert "tag2" in matches.tags + assert self.match3.tagged("tag1") + assert not self.match3.tagged("start") + tag1 = matches.tagged("tag1") assert len(tag1) == 2 assert tag1[0] == self.match2 diff --git a/libs/common/rebulk/validators.py b/libs/common/rebulk/validators.py index 5fd3dcb6..b8959c54 100644 --- a/libs/common/rebulk/validators.py +++ b/libs/common/rebulk/validators.py @@ -62,9 +62,20 @@ def validators(*chained_validators): :return: :rtype: """ + def validator_chain(match): # pylint:disable=missing-docstring for chained_validator in chained_validators: if not chained_validator(match): return False return True + return validator_chain + + +def allways_true(match): # pylint:disable=unused-argument + """ + A validator which is allways true + :param match: + :return: + """ + return True