Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2
Updates rebulk to 2.0.1
This commit is contained in:
Brian Sheldon 2022-11-28 19:44:46 -05:00
commit 685214af26
66 changed files with 2995 additions and 1306 deletions

View file

@ -1,4 +1,5 @@
# coding: utf-8 # coding: utf-8
# file generated by setuptools_scm # file generated by setuptools_scm
# don't change, don't track in version control # don't change, don't track in version control
version = '2.7.5' version = '2.8.2'
version_tuple = (2, 8, 2)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
This module offers a generic easter computing method for any given year, using This module offers a generic Easter computing method for any given year, using
Western, Orthodox or Julian algorithms. Western, Orthodox or Julian algorithms.
""" """
@ -21,14 +21,14 @@ def easter(year, method=EASTER_WESTERN):
quoted in "Explanatory Supplement to the Astronomical quoted in "Explanatory Supplement to the Astronomical
Almanac", P. Kenneth Seidelmann, editor. Almanac", P. Kenneth Seidelmann, editor.
This algorithm implements three different easter This algorithm implements three different Easter
calculation methods: calculation methods:
1 - Original calculation in Julian calendar, valid in 1. Original calculation in Julian calendar, valid in
dates after 326 AD dates after 326 AD
2 - Original method, with date converted to Gregorian 2. Original method, with date converted to Gregorian
calendar, valid in years 1583 to 4099 calendar, valid in years 1583 to 4099
3 - Revised method, in Gregorian calendar, valid in 3. Revised method, in Gregorian calendar, valid in
years 1583 to 4099 as well years 1583 to 4099 as well
These methods are represented by the constants: These methods are represented by the constants:

View file

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from ._parser import parse, parser, parserinfo from ._parser import parse, parser, parserinfo, ParserError
from ._parser import DEFAULTPARSER, DEFAULTTZPARSER from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
from ._parser import UnknownTimezoneWarning from ._parser import UnknownTimezoneWarning
@ -9,6 +9,7 @@ from .isoparser import isoparser, isoparse
__all__ = ['parse', 'parser', 'parserinfo', __all__ = ['parse', 'parser', 'parserinfo',
'isoparse', 'isoparser', 'isoparse', 'isoparser',
'ParserError',
'UnknownTimezoneWarning'] 'UnknownTimezoneWarning']

View file

@ -20,11 +20,11 @@ value falls back to the end of the month.
Additional resources about date/time string formats can be found below: Additional resources about date/time string formats can be found below:
- `A summary of the international standard date and time notation - `A summary of the international standard date and time notation
<http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_ - `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ - `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
- `CPAN ParseDate module - `CPAN ParseDate module
<http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
- `Java SimpleDateFormat Class - `Java SimpleDateFormat Class
<https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
""" """
@ -40,7 +40,7 @@ from calendar import monthrange
from io import StringIO from io import StringIO
import six import six
from six import binary_type, integer_types, text_type from six import integer_types, text_type
from decimal import Decimal from decimal import Decimal
@ -49,7 +49,7 @@ from warnings import warn
from .. import relativedelta from .. import relativedelta
from .. import tz from .. import tz
__all__ = ["parse", "parserinfo"] __all__ = ["parse", "parserinfo", "ParserError"]
# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth # TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth
@ -60,13 +60,7 @@ class _timelex(object):
_split_decimal = re.compile("([.,])") _split_decimal = re.compile("([.,])")
def __init__(self, instream): def __init__(self, instream):
if six.PY2: if isinstance(instream, (bytes, bytearray)):
# In Python 2, we can't duck type properly because unicode has
# a 'decode' function, and we'd be double-decoding
if isinstance(instream, (binary_type, bytearray)):
instream = instream.decode()
else:
if getattr(instream, 'decode', None) is not None:
instream = instream.decode() instream = instream.decode()
if isinstance(instream, text_type): if isinstance(instream, text_type):
@ -291,7 +285,7 @@ class parserinfo(object):
("s", "second", "seconds")] ("s", "second", "seconds")]
AMPM = [("am", "a"), AMPM = [("am", "a"),
("pm", "p")] ("pm", "p")]
UTCZONE = ["UTC", "GMT", "Z"] UTCZONE = ["UTC", "GMT", "Z", "z"]
PERTAIN = ["of"] PERTAIN = ["of"]
TZOFFSET = {} TZOFFSET = {}
# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
@ -388,7 +382,8 @@ class parserinfo(object):
if res.year is not None: if res.year is not None:
res.year = self.convertyear(res.year, res.century_specified) res.year = self.convertyear(res.year, res.century_specified)
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z': if ((res.tzoffset == 0 and not res.tzname) or
(res.tzname == 'Z' or res.tzname == 'z')):
res.tzname = "UTC" res.tzname = "UTC"
res.tzoffset = 0 res.tzoffset = 0
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
@ -422,7 +417,7 @@ class _ymd(list):
elif not self.has_month: elif not self.has_month:
return 1 <= value <= 31 return 1 <= value <= 31
elif not self.has_year: elif not self.has_year:
# Be permissive, assume leapyear # Be permissive, assume leap year
month = self[self.mstridx] month = self[self.mstridx]
return 1 <= value <= monthrange(2000, month)[1] return 1 <= value <= monthrange(2000, month)[1]
else: else:
@ -538,7 +533,7 @@ class _ymd(list):
year, month, day = self year, month, day = self
else: else:
# 01-Jan-01 # 01-Jan-01
# Give precendence to day-first, since # Give precedence to day-first, since
# two-digit years is usually hand-written. # two-digit years is usually hand-written.
day, month, year = self day, month, year = self
@ -625,7 +620,7 @@ class parser(object):
first element being a :class:`datetime.datetime` object, the second first element being a :class:`datetime.datetime` object, the second
a tuple containing the fuzzy tokens. a tuple containing the fuzzy tokens.
:raises ValueError: :raises ParserError:
Raised for invalid or unknown string format, if the provided Raised for invalid or unknown string format, if the provided
:class:`tzinfo` is not in a valid format, or if an invalid date :class:`tzinfo` is not in a valid format, or if an invalid date
would be created. would be created.
@ -645,12 +640,15 @@ class parser(object):
res, skipped_tokens = self._parse(timestr, **kwargs) res, skipped_tokens = self._parse(timestr, **kwargs)
if res is None: if res is None:
raise ValueError("Unknown string format:", timestr) raise ParserError("Unknown string format: %s", timestr)
if len(res) == 0: if len(res) == 0:
raise ValueError("String does not contain a date:", timestr) raise ParserError("String does not contain a date: %s", timestr)
try:
ret = self._build_naive(res, default) ret = self._build_naive(res, default)
except ValueError as e:
six.raise_from(ParserError(str(e) + ": %s", timestr), e)
if not ignoretz: if not ignoretz:
ret = self._build_tzaware(ret, res, tzinfos) ret = self._build_tzaware(ret, res, tzinfos)
@ -1021,7 +1019,7 @@ class parser(object):
hms_idx = idx + 2 hms_idx = idx + 2
elif idx > 0 and info.hms(tokens[idx-1]) is not None: elif idx > 0 and info.hms(tokens[idx-1]) is not None:
# There is a "h", "m", or "s" preceeding this token. Since neither # There is a "h", "m", or "s" preceding this token. Since neither
# of the previous cases was hit, there is no label following this # of the previous cases was hit, there is no label following this
# token, so we use the previous label. # token, so we use the previous label.
# e.g. the "04" in "12h04" # e.g. the "04" in "12h04"
@ -1060,7 +1058,8 @@ class parser(object):
tzname is None and tzname is None and
tzoffset is None and tzoffset is None and
len(token) <= 5 and len(token) <= 5 and
all(x in string.ascii_uppercase for x in token)) (all(x in string.ascii_uppercase for x in token)
or token in self.info.UTCZONE))
def _ampm_valid(self, hour, ampm, fuzzy): def _ampm_valid(self, hour, ampm, fuzzy):
""" """
@ -1100,7 +1099,7 @@ class parser(object):
def _parse_min_sec(self, value): def _parse_min_sec(self, value):
# TODO: Every usage of this function sets res.second to the return # TODO: Every usage of this function sets res.second to the return
# value. Are there any cases where second will be returned as None and # value. Are there any cases where second will be returned as None and
# we *dont* want to set res.second = None? # we *don't* want to set res.second = None?
minute = int(value) minute = int(value)
second = None second = None
@ -1109,14 +1108,6 @@ class parser(object):
second = int(60 * sec_remainder) second = int(60 * sec_remainder)
return (minute, second) return (minute, second)
def _parsems(self, value):
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
if "." not in value:
return int(value), 0
else:
i, f = value.split(".")
return int(i), int(f.ljust(6, "0")[:6])
def _parse_hms(self, idx, tokens, info, hms_idx): def _parse_hms(self, idx, tokens, info, hms_idx):
# TODO: Is this going to admit a lot of false-positives for when we # TODO: Is this going to admit a lot of false-positives for when we
# just happen to have digits and "h", "m" or "s" characters in non-date # just happen to have digits and "h", "m" or "s" characters in non-date
@ -1135,21 +1126,35 @@ class parser(object):
return (new_idx, hms) return (new_idx, hms)
def _recombine_skipped(self, tokens, skipped_idxs): # ------------------------------------------------------------------
""" # Handling for individual tokens. These are kept as methods instead
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] # of functions for the sake of customizability via subclassing.
>>> skipped_idxs = [0, 1, 2, 5]
>>> _recombine_skipped(tokens, skipped_idxs)
["foo bar", "baz"]
"""
skipped_tokens = []
for i, idx in enumerate(sorted(skipped_idxs)):
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
else:
skipped_tokens.append(tokens[idx])
return skipped_tokens def _parsems(self, value):
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
if "." not in value:
return int(value), 0
else:
i, f = value.split(".")
return int(i), int(f.ljust(6, "0")[:6])
def _to_decimal(self, val):
try:
decimal_value = Decimal(val)
# See GH 662, edge case, infinite value should not be converted
# via `_to_decimal`
if not decimal_value.is_finite():
raise ValueError("Converted decimal value is infinite or NaN")
except Exception as e:
msg = "Could not convert %s to decimal" % val
six.raise_from(ValueError(msg), e)
else:
return decimal_value
# ------------------------------------------------------------------
# Post-Parsing construction of datetime output. These are kept as
# methods instead of functions for the sake of customizability via
# subclassing.
def _build_tzinfo(self, tzinfos, tzname, tzoffset): def _build_tzinfo(self, tzinfos, tzname, tzoffset):
if callable(tzinfos): if callable(tzinfos):
@ -1164,6 +1169,9 @@ class parser(object):
tzinfo = tz.tzstr(tzdata) tzinfo = tz.tzstr(tzdata)
elif isinstance(tzdata, integer_types): elif isinstance(tzdata, integer_types):
tzinfo = tz.tzoffset(tzname, tzdata) tzinfo = tz.tzoffset(tzname, tzdata)
else:
raise TypeError("Offset must be tzinfo subclass, tz string, "
"or int offset.")
return tzinfo return tzinfo
def _build_tzaware(self, naive, res, tzinfos): def _build_tzaware(self, naive, res, tzinfos):
@ -1181,10 +1189,10 @@ class parser(object):
# This is mostly relevant for winter GMT zones parsed in the UK # This is mostly relevant for winter GMT zones parsed in the UK
if (aware.tzname() != res.tzname and if (aware.tzname() != res.tzname and
res.tzname in self.info.UTCZONE): res.tzname in self.info.UTCZONE):
aware = aware.replace(tzinfo=tz.tzutc()) aware = aware.replace(tzinfo=tz.UTC)
elif res.tzoffset == 0: elif res.tzoffset == 0:
aware = naive.replace(tzinfo=tz.tzutc()) aware = naive.replace(tzinfo=tz.UTC)
elif res.tzoffset: elif res.tzoffset:
aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
@ -1239,17 +1247,21 @@ class parser(object):
return dt return dt
def _to_decimal(self, val): def _recombine_skipped(self, tokens, skipped_idxs):
try: """
decimal_value = Decimal(val) >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
# See GH 662, edge case, infinite value should not be converted via `_to_decimal` >>> skipped_idxs = [0, 1, 2, 5]
if not decimal_value.is_finite(): >>> _recombine_skipped(tokens, skipped_idxs)
raise ValueError("Converted decimal value is infinite or NaN") ["foo bar", "baz"]
except Exception as e: """
msg = "Could not convert %s to decimal" % val skipped_tokens = []
six.raise_from(ValueError(msg), e) for i, idx in enumerate(sorted(skipped_idxs)):
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
else: else:
return decimal_value skipped_tokens.append(tokens[idx])
return skipped_tokens
DEFAULTPARSER = parser() DEFAULTPARSER = parser()
@ -1341,10 +1353,10 @@ def parse(timestr, parserinfo=None, **kwargs):
first element being a :class:`datetime.datetime` object, the second first element being a :class:`datetime.datetime` object, the second
a tuple containing the fuzzy tokens. a tuple containing the fuzzy tokens.
:raises ValueError: :raises ParserError:
Raised for invalid or unknown string format, if the provided Raised for invalid or unknown string formats, if the provided
:class:`tzinfo` is not in a valid format, or if an invalid date :class:`tzinfo` is not in a valid format, or if an invalid date would
would be created. be created.
:raises OverflowError: :raises OverflowError:
Raised if the parsed date exceeds the largest valid C integer on Raised if the parsed date exceeds the largest valid C integer on
@ -1573,6 +1585,29 @@ DEFAULTTZPARSER = _tzparser()
def _parsetz(tzstr): def _parsetz(tzstr):
return DEFAULTTZPARSER.parse(tzstr) return DEFAULTTZPARSER.parse(tzstr)
class ParserError(ValueError):
"""Exception subclass used for any failure to parse a datetime string.
This is a subclass of :py:exc:`ValueError`, and should be raised any time
earlier versions of ``dateutil`` would have raised ``ValueError``.
.. versionadded:: 2.8.1
"""
def __str__(self):
try:
return self.args[0] % self.args[1:]
except (TypeError, IndexError):
return super(ParserError, self).__str__()
def __repr__(self):
args = ", ".join("'%s'" % arg for arg in self.args)
return "%s(%s)" % (self.__class__.__name__, args)
class UnknownTimezoneWarning(RuntimeWarning): class UnknownTimezoneWarning(RuntimeWarning):
"""Raised when the parser finds a timezone it cannot parse into a tzinfo""" """Raised when the parser finds a timezone it cannot parse into a tzinfo.
.. versionadded:: 2.7.0
"""
# vim:ts=4:sw=4:et # vim:ts=4:sw=4:et

View file

@ -88,10 +88,12 @@ class isoparser(object):
- ``hh`` - ``hh``
- ``hh:mm`` or ``hhmm`` - ``hh:mm`` or ``hhmm``
- ``hh:mm:ss`` or ``hhmmss`` - ``hh:mm:ss`` or ``hhmmss``
- ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits) - ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
Midnight is a special case for `hh`, as the standard supports both Midnight is a special case for `hh`, as the standard supports both
00:00 and 24:00 as a representation. 00:00 and 24:00 as a representation. The decimal separator can be
either a dot or a comma.
.. caution:: .. caution::
@ -137,6 +139,10 @@ class isoparser(object):
else: else:
raise ValueError('String contains unknown ISO components') raise ValueError('String contains unknown ISO components')
if len(components) > 3 and components[3] == 24:
components[3] = 0
return datetime(*components) + timedelta(days=1)
return datetime(*components) return datetime(*components)
@_takes_ascii @_takes_ascii
@ -153,7 +159,7 @@ class isoparser(object):
components, pos = self._parse_isodate(datestr) components, pos = self._parse_isodate(datestr)
if pos < len(datestr): if pos < len(datestr):
raise ValueError('String contains unknown ISO ' + raise ValueError('String contains unknown ISO ' +
'components: {}'.format(datestr)) 'components: {!r}'.format(datestr.decode('ascii')))
return date(*components) return date(*components)
@_takes_ascii @_takes_ascii
@ -167,7 +173,10 @@ class isoparser(object):
:return: :return:
Returns a :class:`datetime.time` object Returns a :class:`datetime.time` object
""" """
return time(*self._parse_isotime(timestr)) components = self._parse_isotime(timestr)
if components[0] == 24:
components[0] = 0
return time(*components)
@_takes_ascii @_takes_ascii
def parse_tzstr(self, tzstr, zero_as_utc=True): def parse_tzstr(self, tzstr, zero_as_utc=True):
@ -190,10 +199,9 @@ class isoparser(object):
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc) return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
# Constants # Constants
_MICROSECOND_END_REGEX = re.compile(b'[-+Z]+')
_DATE_SEP = b'-' _DATE_SEP = b'-'
_TIME_SEP = b':' _TIME_SEP = b':'
_MICRO_SEP = b'.' _FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
def _parse_isodate(self, dt_str): def _parse_isodate(self, dt_str):
try: try:
@ -325,39 +333,42 @@ class isoparser(object):
pos = 0 pos = 0
comp = -1 comp = -1
if len(timestr) < 2: if len_str < 2:
raise ValueError('ISO time too short') raise ValueError('ISO time too short')
has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP has_sep = False
while pos < len_str and comp < 5: while pos < len_str and comp < 5:
comp += 1 comp += 1
if timestr[pos:pos + 1] in b'-+Z': if timestr[pos:pos + 1] in b'-+Zz':
# Detect time zone boundary # Detect time zone boundary
components[-1] = self._parse_tzstr(timestr[pos:]) components[-1] = self._parse_tzstr(timestr[pos:])
pos = len_str pos = len_str
break break
if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
has_sep = True
pos += 1
elif comp == 2 and has_sep:
if timestr[pos:pos+1] != self._TIME_SEP:
raise ValueError('Inconsistent use of colon separator')
pos += 1
if comp < 3: if comp < 3:
# Hour, minute, second # Hour, minute, second
components[comp] = int(timestr[pos:pos + 2]) components[comp] = int(timestr[pos:pos + 2])
pos += 2 pos += 2
if (has_sep and pos < len_str and
timestr[pos:pos + 1] == self._TIME_SEP):
pos += 1
if comp == 3: if comp == 3:
# Microsecond # Fraction of a second
if timestr[pos:pos + 1] != self._MICRO_SEP: frac = self._FRACTION_REGEX.match(timestr[pos:])
if not frac:
continue continue
pos += 1 us_str = frac.group(1)[:6] # Truncate to microseconds
us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6],
1)[0]
components[comp] = int(us_str) * 10**(6 - len(us_str)) components[comp] = int(us_str) * 10**(6 - len(us_str))
pos += len(us_str) pos += len(frac.group())
if pos < len_str: if pos < len_str:
raise ValueError('Unused components in ISO string') raise ValueError('Unused components in ISO string')
@ -366,13 +377,12 @@ class isoparser(object):
# Standard supports 00:00 and 24:00 as representations of midnight # Standard supports 00:00 and 24:00 as representations of midnight
if any(component != 0 for component in components[1:4]): if any(component != 0 for component in components[1:4]):
raise ValueError('Hour may only be 24 at 24:00:00.000') raise ValueError('Hour may only be 24 at 24:00:00.000')
components[0] = 0
return components return components
def _parse_tzstr(self, tzstr, zero_as_utc=True): def _parse_tzstr(self, tzstr, zero_as_utc=True):
if tzstr == b'Z': if tzstr == b'Z' or tzstr == b'z':
return tz.tzutc() return tz.UTC
if len(tzstr) not in {3, 5, 6}: if len(tzstr) not in {3, 5, 6}:
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters') raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
@ -391,7 +401,7 @@ class isoparser(object):
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):]) minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
if zero_as_utc and hours == 0 and minutes == 0: if zero_as_utc and hours == 0 and minutes == 0:
return tz.tzutc() return tz.UTC
else: else:
if minutes > 59: if minutes > 59:
raise ValueError('Invalid minutes in time zone offset') raise ValueError('Invalid minutes in time zone offset')

View file

@ -17,8 +17,12 @@ __all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
class relativedelta(object): class relativedelta(object):
""" """
The relativedelta type is based on the specification of the excellent The relativedelta type is designed to be applied to an existing datetime and
work done by M.-A. Lemburg in his can replace specific components of that datetime, or represents an interval
of time.
It is based on the specification of the excellent work done by M.-A. Lemburg
in his
`mx.DateTime <https://www.egenix.com/products/python/mxBase/mxDateTime/>`_ extension. `mx.DateTime <https://www.egenix.com/products/python/mxBase/mxDateTime/>`_ extension.
However, notice that this type does *NOT* implement the same algorithm as However, notice that this type does *NOT* implement the same algorithm as
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart. his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
@ -41,17 +45,19 @@ class relativedelta(object):
years, months, weeks, days, hours, minutes, seconds, microseconds: years, months, weeks, days, hours, minutes, seconds, microseconds:
Relative information, may be negative (argument is plural); adding Relative information, may be negative (argument is plural); adding
or subtracting a relativedelta with relative information performs or subtracting a relativedelta with relative information performs
the corresponding aritmetic operation on the original datetime value the corresponding arithmetic operation on the original datetime value
with the information in the relativedelta. with the information in the relativedelta.
weekday: weekday:
One of the weekday instances (MO, TU, etc). These One of the weekday instances (MO, TU, etc) available in the
instances may receive a parameter N, specifying the Nth relativedelta module. These instances may receive a parameter N,
weekday, which could be positive or negative (like MO(+1) specifying the Nth weekday, which could be positive or negative
or MO(-2). Not specifying it is the same as specifying (like MO(+1) or MO(-2)). Not specifying it is the same as specifying
+1. You can also use an integer, where 0=MO. Notice that +1. You can also use an integer, where 0=MO. This argument is always
if the calculated date is already Monday, for example, relative e.g. if the calculated date is already Monday, using MO(1)
using MO(1) or MO(-1) won't change the day. or MO(-1) won't change the day. To effectively make it absolute, use
it in combination with the day argument (e.g. day=1, MO(1) for first
Monday of the month).
leapdays: leapdays:
Will add given days to the date found, if year is a leap Will add given days to the date found, if year is a leap
@ -82,9 +88,12 @@ class relativedelta(object):
For example For example
>>> from datetime import datetime
>>> from dateutil.relativedelta import relativedelta, MO
>>> dt = datetime(2018, 4, 9, 13, 37, 0) >>> dt = datetime(2018, 4, 9, 13, 37, 0)
>>> delta = relativedelta(hours=25, day=1, weekday=MO(1)) >>> delta = relativedelta(hours=25, day=1, weekday=MO(1))
datetime(2018, 4, 2, 14, 37, 0) >>> dt + delta
datetime.datetime(2018, 4, 2, 14, 37)
First, the day is set to 1 (the first of the month), then 25 hours First, the day is set to 1 (the first of the month), then 25 hours
are added, to get to the 2nd day and 14th hour, finally the are added, to get to the 2nd day and 14th hour, finally the
@ -276,7 +285,7 @@ class relativedelta(object):
values for the relative attributes. values for the relative attributes.
>>> relativedelta(days=1.5, hours=2).normalized() >>> relativedelta(days=1.5, hours=2).normalized()
relativedelta(days=1, hours=14) relativedelta(days=+1, hours=+14)
:return: :return:
Returns a :class:`dateutil.relativedelta.relativedelta` object. Returns a :class:`dateutil.relativedelta.relativedelta` object.

View file

@ -5,27 +5,27 @@ the recurrence rules documented in the
`iCalendar RFC <https://tools.ietf.org/html/rfc5545>`_, `iCalendar RFC <https://tools.ietf.org/html/rfc5545>`_,
including support for caching of results. including support for caching of results.
""" """
import itertools
import datetime
import calendar import calendar
import datetime
import heapq
import itertools
import re import re
import sys import sys
from functools import wraps
# For warning about deprecation of until and count
from warnings import warn
from six import advance_iterator, integer_types
from six.moves import _thread, range
from ._common import weekday as weekdaybase
try: try:
from math import gcd from math import gcd
except ImportError: except ImportError:
from fractions import gcd from fractions import gcd
from six import advance_iterator, integer_types
from six.moves import _thread, range
import heapq
from ._common import weekday as weekdaybase
from .tz import tzutc, tzlocal
# For warning about deprecation of until and count
from warnings import warn
__all__ = ["rrule", "rruleset", "rrulestr", __all__ = ["rrule", "rruleset", "rrulestr",
"YEARLY", "MONTHLY", "WEEKLY", "DAILY", "YEARLY", "MONTHLY", "WEEKLY", "DAILY",
"HOURLY", "MINUTELY", "SECONDLY", "HOURLY", "MINUTELY", "SECONDLY",
@ -82,6 +82,7 @@ def _invalidates_cache(f):
Decorator for rruleset methods which may invalidate the Decorator for rruleset methods which may invalidate the
cached length. cached length.
""" """
@wraps(f)
def inner_func(self, *args, **kwargs): def inner_func(self, *args, **kwargs):
rv = f(self, *args, **kwargs) rv = f(self, *args, **kwargs)
self._invalidate_cache() self._invalidate_cache()
@ -178,7 +179,7 @@ class rrulebase(object):
return False return False
return False return False
# __len__() introduces a large performance penality. # __len__() introduces a large performance penalty.
def count(self): def count(self):
""" Returns the number of recurrences in this set. It will have go """ Returns the number of recurrences in this set. It will have go
trough the whole recurrence, if this hasn't been done before. """ trough the whole recurrence, if this hasn't been done before. """
@ -353,20 +354,26 @@ class rrule(rrulebase):
from calendar.firstweekday(), and may be modified by from calendar.firstweekday(), and may be modified by
calendar.setfirstweekday(). calendar.setfirstweekday().
:param count: :param count:
How many occurrences will be generated. If given, this determines how many occurrences will be generated.
.. note:: .. note::
As of version 2.5.0, the use of the ``until`` keyword together As of version 2.5.0, the use of the keyword ``until`` in conjunction
with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10. with ``count`` is deprecated, to make sure ``dateutil`` is fully
compliant with `RFC-5545 Sec. 3.3.10 <https://tools.ietf.org/
html/rfc5545#section-3.3.10>`_. Therefore, ``until`` and ``count``
**must not** occur in the same call to ``rrule``.
:param until: :param until:
If given, this must be a datetime instance, that will specify the If given, this must be a datetime instance specifying the upper-bound
limit of the recurrence. The last recurrence in the rule is the greatest limit of the recurrence. The last recurrence in the rule is the greatest
datetime that is less than or equal to the value specified in the datetime that is less than or equal to the value specified in the
``until`` parameter. ``until`` parameter.
.. note:: .. note::
As of version 2.5.0, the use of the ``until`` keyword together As of version 2.5.0, the use of the keyword ``until`` in conjunction
with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10. with ``count`` is deprecated, to make sure ``dateutil`` is fully
compliant with `RFC-5545 Sec. 3.3.10 <https://tools.ietf.org/
html/rfc5545#section-3.3.10>`_. Therefore, ``until`` and ``count``
**must not** occur in the same call to ``rrule``.
:param bysetpos: :param bysetpos:
If given, it must be either an integer, or a sequence of integers, If given, it must be either an integer, or a sequence of integers,
positive or negative. Each given integer will specify an occurrence positive or negative. Each given integer will specify an occurrence
@ -1406,7 +1413,52 @@ class rruleset(rrulebase):
self._len = total self._len = total
class _rrulestr(object): class _rrulestr(object):
""" Parses a string representation of a recurrence rule or set of
recurrence rules.
:param s:
Required, a string defining one or more recurrence rules.
:param dtstart:
If given, used as the default recurrence start if not specified in the
rule string.
:param cache:
If set ``True`` caching of results will be enabled, improving
performance of multiple queries considerably.
:param unfold:
If set ``True`` indicates that a rule string is split over more
than one line and should be joined before processing.
:param forceset:
If set ``True`` forces a :class:`dateutil.rrule.rruleset` to
be returned.
:param compatible:
If set ``True`` forces ``unfold`` and ``forceset`` to be ``True``.
:param ignoretz:
If set ``True``, time zones in parsed strings are ignored and a naive
:class:`datetime.datetime` object is returned.
:param tzids:
If given, a callable or mapping used to retrieve a
:class:`datetime.tzinfo` from a string representation.
Defaults to :func:`dateutil.tz.gettz`.
:param tzinfos:
Additional time zone names / aliases which may be present in a string
representation. See :func:`dateutil.parser.parse` for more
information.
:return:
Returns a :class:`dateutil.rrule.rruleset` or
:class:`dateutil.rrule.rrule`
"""
_freq_map = {"YEARLY": YEARLY, _freq_map = {"YEARLY": YEARLY,
"MONTHLY": MONTHLY, "MONTHLY": MONTHLY,
@ -1508,6 +1560,58 @@ class _rrulestr(object):
raise ValueError("invalid '%s': %s" % (name, value)) raise ValueError("invalid '%s': %s" % (name, value))
return rrule(dtstart=dtstart, cache=cache, **rrkwargs) return rrule(dtstart=dtstart, cache=cache, **rrkwargs)
def _parse_date_value(self, date_value, parms, rule_tzids,
ignoretz, tzids, tzinfos):
global parser
if not parser:
from dateutil import parser
datevals = []
value_found = False
TZID = None
for parm in parms:
if parm.startswith("TZID="):
try:
tzkey = rule_tzids[parm.split('TZID=')[-1]]
except KeyError:
continue
if tzids is None:
from . import tz
tzlookup = tz.gettz
elif callable(tzids):
tzlookup = tzids
else:
tzlookup = getattr(tzids, 'get', None)
if tzlookup is None:
msg = ('tzids must be a callable, mapping, or None, '
'not %s' % tzids)
raise ValueError(msg)
TZID = tzlookup(tzkey)
continue
# RFC 5445 3.8.2.4: The VALUE parameter is optional, but may be found
# only once.
if parm not in {"VALUE=DATE-TIME", "VALUE=DATE"}:
raise ValueError("unsupported parm: " + parm)
else:
if value_found:
msg = ("Duplicate value parameter found in: " + parm)
raise ValueError(msg)
value_found = True
for datestr in date_value.split(','):
date = parser.parse(datestr, ignoretz=ignoretz, tzinfos=tzinfos)
if TZID is not None:
if date.tzinfo is None:
date = date.replace(tzinfo=TZID)
else:
raise ValueError('DTSTART/EXDATE specifies multiple timezone')
datevals.append(date)
return datevals
def _parse_rfc(self, s, def _parse_rfc(self, s,
dtstart=None, dtstart=None,
cache=False, cache=False,
@ -1580,54 +1684,18 @@ class _rrulestr(object):
raise ValueError("unsupported EXRULE parm: "+parm) raise ValueError("unsupported EXRULE parm: "+parm)
exrulevals.append(value) exrulevals.append(value)
elif name == "EXDATE": elif name == "EXDATE":
for parm in parms: exdatevals.extend(
if parm != "VALUE=DATE-TIME": self._parse_date_value(value, parms,
raise ValueError("unsupported EXDATE parm: "+parm) TZID_NAMES, ignoretz,
exdatevals.append(value) tzids, tzinfos)
)
elif name == "DTSTART": elif name == "DTSTART":
# RFC 5445 3.8.2.4: The VALUE parameter is optional, but dtvals = self._parse_date_value(value, parms, TZID_NAMES,
# may be found only once. ignoretz, tzids, tzinfos)
value_found = False if len(dtvals) != 1:
TZID = None raise ValueError("Multiple DTSTART values specified:" +
valid_values = {"VALUE=DATE-TIME", "VALUE=DATE"} value)
for parm in parms: dtstart = dtvals[0]
if parm.startswith("TZID="):
try:
tzkey = TZID_NAMES[parm.split('TZID=')[-1]]
except KeyError:
continue
if tzids is None:
from . import tz
tzlookup = tz.gettz
elif callable(tzids):
tzlookup = tzids
else:
tzlookup = getattr(tzids, 'get', None)
if tzlookup is None:
msg = ('tzids must be a callable, ' +
'mapping, or None, ' +
'not %s' % tzids)
raise ValueError(msg)
TZID = tzlookup(tzkey)
continue
if parm not in valid_values:
raise ValueError("unsupported DTSTART parm: "+parm)
else:
if value_found:
msg = ("Duplicate value parameter found in " +
"DTSTART: " + parm)
raise ValueError(msg)
value_found = True
if not parser:
from dateutil import parser
dtstart = parser.parse(value, ignoretz=ignoretz,
tzinfos=tzinfos)
if TZID is not None:
if dtstart.tzinfo is None:
dtstart = dtstart.replace(tzinfo=TZID)
else:
raise ValueError('DTSTART specifies multiple timezones')
else: else:
raise ValueError("unsupported property: "+name) raise ValueError("unsupported property: "+name)
if (forceset or len(rrulevals) > 1 or rdatevals if (forceset or len(rrulevals) > 1 or rdatevals
@ -1649,10 +1717,7 @@ class _rrulestr(object):
ignoretz=ignoretz, ignoretz=ignoretz,
tzinfos=tzinfos)) tzinfos=tzinfos))
for value in exdatevals: for value in exdatevals:
for datestr in value.split(','): rset.exdate(value)
rset.exdate(parser.parse(datestr,
ignoretz=ignoretz,
tzinfos=tzinfos))
if compatible and dtstart: if compatible and dtstart:
rset.rdate(dtstart) rset.rdate(dtstart)
return rset return rset

View file

@ -2,11 +2,6 @@
from .tz import * from .tz import *
from .tz import __doc__ from .tz import __doc__
#: Convenience constant providing a :class:`tzutc()` instance
#:
#: .. versionadded:: 2.7.0
UTC = tzutc()
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange", __all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz", "tzstr", "tzical", "tzwin", "tzwinlocal", "gettz",
"enfold", "datetime_ambiguous", "datetime_exists", "enfold", "datetime_ambiguous", "datetime_exists",

View file

@ -1,4 +1,4 @@
from six import PY3 from six import PY2
from functools import wraps from functools import wraps
@ -16,14 +16,18 @@ def tzname_in_python2(namefunc):
tzname() API changed in Python 3. It used to return bytes, but was changed tzname() API changed in Python 3. It used to return bytes, but was changed
to unicode strings to unicode strings
""" """
if PY2:
@wraps(namefunc)
def adjust_encoding(*args, **kwargs): def adjust_encoding(*args, **kwargs):
name = namefunc(*args, **kwargs) name = namefunc(*args, **kwargs)
if name is not None and not PY3: if name is not None:
name = name.encode() name = name.encode()
return name return name
return adjust_encoding return adjust_encoding
else:
return namefunc
# The following is adapted from Alexander Belopolsky's tz library # The following is adapted from Alexander Belopolsky's tz library
@ -208,7 +212,7 @@ class _tzinfo(tzinfo):
Since this is the one time that we *know* we have an unambiguous Since this is the one time that we *know* we have an unambiguous
datetime object, we take this opportunity to determine whether the datetime object, we take this opportunity to determine whether the
datetime is ambiguous and in a "fold" state (e.g. if it's the first datetime is ambiguous and in a "fold" state (e.g. if it's the first
occurence, chronologically, of the ambiguous datetime). occurrence, chronologically, of the ambiguous datetime).
:param dt: :param dt:
A timezone-aware :class:`datetime.datetime` object. A timezone-aware :class:`datetime.datetime` object.
@ -246,7 +250,7 @@ class _tzinfo(tzinfo):
Since this is the one time that we *know* we have an unambiguous Since this is the one time that we *know* we have an unambiguous
datetime object, we take this opportunity to determine whether the datetime object, we take this opportunity to determine whether the
datetime is ambiguous and in a "fold" state (e.g. if it's the first datetime is ambiguous and in a "fold" state (e.g. if it's the first
occurance, chronologically, of the ambiguous datetime). occurrence, chronologically, of the ambiguous datetime).
:param dt: :param dt:
A timezone-aware :class:`datetime.datetime` object. A timezone-aware :class:`datetime.datetime` object.

View file

@ -1,4 +1,8 @@
from datetime import timedelta from datetime import timedelta
import weakref
from collections import OrderedDict
from six.moves import _thread
class _TzSingleton(type): class _TzSingleton(type):
@ -11,6 +15,7 @@ class _TzSingleton(type):
cls.__instance = super(_TzSingleton, cls).__call__() cls.__instance = super(_TzSingleton, cls).__call__()
return cls.__instance return cls.__instance
class _TzFactory(type): class _TzFactory(type):
def instance(cls, *args, **kwargs): def instance(cls, *args, **kwargs):
"""Alternate constructor that returns a fresh instance""" """Alternate constructor that returns a fresh instance"""
@ -19,7 +24,11 @@ class _TzFactory(type):
class _TzOffsetFactory(_TzFactory): class _TzOffsetFactory(_TzFactory):
def __init__(cls, *args, **kwargs): def __init__(cls, *args, **kwargs):
cls.__instances = {} cls.__instances = weakref.WeakValueDictionary()
cls.__strong_cache = OrderedDict()
cls.__strong_cache_size = 8
cls._cache_lock = _thread.allocate_lock()
def __call__(cls, name, offset): def __call__(cls, name, offset):
if isinstance(offset, timedelta): if isinstance(offset, timedelta):
@ -31,12 +40,25 @@ class _TzOffsetFactory(_TzFactory):
if instance is None: if instance is None:
instance = cls.__instances.setdefault(key, instance = cls.__instances.setdefault(key,
cls.instance(name, offset)) cls.instance(name, offset))
# This lock may not be necessary in Python 3. See GH issue #901
with cls._cache_lock:
cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
# Remove an item if the strong cache is overpopulated
if len(cls.__strong_cache) > cls.__strong_cache_size:
cls.__strong_cache.popitem(last=False)
return instance return instance
class _TzStrFactory(_TzFactory): class _TzStrFactory(_TzFactory):
def __init__(cls, *args, **kwargs): def __init__(cls, *args, **kwargs):
cls.__instances = {} cls.__instances = weakref.WeakValueDictionary()
cls.__strong_cache = OrderedDict()
cls.__strong_cache_size = 8
cls.__cache_lock = _thread.allocate_lock()
def __call__(cls, s, posix_offset=False): def __call__(cls, s, posix_offset=False):
key = (s, posix_offset) key = (s, posix_offset)
@ -45,5 +67,14 @@ class _TzStrFactory(_TzFactory):
if instance is None: if instance is None:
instance = cls.__instances.setdefault(key, instance = cls.__instances.setdefault(key,
cls.instance(s, posix_offset)) cls.instance(s, posix_offset))
# This lock may not be necessary in Python 3. See GH issue #901
with cls.__cache_lock:
cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
# Remove an item if the strong cache is overpopulated
if len(cls.__strong_cache) > cls.__strong_cache_size:
cls.__strong_cache.popitem(last=False)
return instance return instance

View file

@ -13,6 +13,8 @@ import time
import sys import sys
import os import os
import bisect import bisect
import weakref
from collections import OrderedDict
import six import six
from six import string_types from six import string_types
@ -28,6 +30,9 @@ try:
except ImportError: except ImportError:
tzwin = tzwinlocal = None tzwin = tzwinlocal = None
# For warning about rounding tzinfo
from warnings import warn
ZERO = datetime.timedelta(0) ZERO = datetime.timedelta(0)
EPOCH = datetime.datetime.utcfromtimestamp(0) EPOCH = datetime.datetime.utcfromtimestamp(0)
EPOCHORDINAL = EPOCH.toordinal() EPOCHORDINAL = EPOCH.toordinal()
@ -118,6 +123,12 @@ class tzutc(datetime.tzinfo):
__reduce__ = object.__reduce__ __reduce__ = object.__reduce__
#: Convenience constant providing a :class:`tzutc()` instance
#:
#: .. versionadded:: 2.7.0
UTC = tzutc()
@six.add_metaclass(_TzOffsetFactory) @six.add_metaclass(_TzOffsetFactory)
class tzoffset(datetime.tzinfo): class tzoffset(datetime.tzinfo):
""" """
@ -137,7 +148,8 @@ class tzoffset(datetime.tzinfo):
offset = offset.total_seconds() offset = offset.total_seconds()
except (TypeError, AttributeError): except (TypeError, AttributeError):
pass pass
self._offset = datetime.timedelta(seconds=offset)
self._offset = datetime.timedelta(seconds=_get_supported_offset(offset))
def utcoffset(self, dt): def utcoffset(self, dt):
return self._offset return self._offset
@ -373,7 +385,7 @@ class _tzfile(object):
class tzfile(_tzinfo): class tzfile(_tzinfo):
""" """
This is a ``tzinfo`` subclass thant allows one to use the ``tzfile(5)`` This is a ``tzinfo`` subclass that allows one to use the ``tzfile(5)``
format timezone files to extract current and historical zone information. format timezone files to extract current and historical zone information.
:param fileobj: :param fileobj:
@ -460,7 +472,7 @@ class tzfile(_tzinfo):
if fileobj is not None: if fileobj is not None:
if not file_opened_here: if not file_opened_here:
fileobj = _ContextWrapper(fileobj) fileobj = _nullcontext(fileobj)
with fileobj as file_stream: with fileobj as file_stream:
tzobj = self._read_tzfile(file_stream) tzobj = self._read_tzfile(file_stream)
@ -600,10 +612,7 @@ class tzfile(_tzinfo):
out.ttinfo_list = [] out.ttinfo_list = []
for i in range(typecnt): for i in range(typecnt):
gmtoff, isdst, abbrind = ttinfo[i] gmtoff, isdst, abbrind = ttinfo[i]
# Round to full-minutes if that's not the case. Python's gmtoff = _get_supported_offset(gmtoff)
# datetime doesn't accept sub-minute timezones. Check
# http://python.org/sf/1447945 for some information.
gmtoff = 60 * ((gmtoff + 30) // 60)
tti = _ttinfo() tti = _ttinfo()
tti.offset = gmtoff tti.offset = gmtoff
tti.dstoffset = datetime.timedelta(0) tti.dstoffset = datetime.timedelta(0)
@ -655,37 +664,44 @@ class tzfile(_tzinfo):
# isgmt are off, so it should be in wall time. OTOH, it's # isgmt are off, so it should be in wall time. OTOH, it's
# always in gmt time. Let me know if you have comments # always in gmt time. Let me know if you have comments
# about this. # about this.
laststdoffset = None lastdst = None
lastoffset = None
lastdstoffset = None
lastbaseoffset = None
out.trans_list = [] out.trans_list = []
for i, tti in enumerate(out.trans_idx): for i, tti in enumerate(out.trans_idx):
if not tti.isdst:
offset = tti.offset offset = tti.offset
laststdoffset = offset dstoffset = 0
else:
if laststdoffset is not None:
# Store the DST offset as well and update it in the list
tti.dstoffset = tti.offset - laststdoffset
out.trans_idx[i] = tti
offset = laststdoffset or 0 if lastdst is not None:
out.trans_list.append(out.trans_list_utc[i] + offset)
# In case we missed any DST offsets on the way in for some reason, make
# a second pass over the list, looking for the /next/ DST offset.
laststdoffset = None
for i in reversed(range(len(out.trans_idx))):
tti = out.trans_idx[i]
if tti.isdst: if tti.isdst:
if not (tti.dstoffset or laststdoffset is None): if not lastdst:
tti.dstoffset = tti.offset - laststdoffset dstoffset = offset - lastoffset
else:
laststdoffset = tti.offset
if not isinstance(tti.dstoffset, datetime.timedelta): if not dstoffset and lastdstoffset:
tti.dstoffset = datetime.timedelta(seconds=tti.dstoffset) dstoffset = lastdstoffset
out.trans_idx[i] = tti tti.dstoffset = datetime.timedelta(seconds=dstoffset)
lastdstoffset = dstoffset
# If a time zone changes its base offset during a DST transition,
# then you need to adjust by the previous base offset to get the
# transition time in local time. Otherwise you use the current
# base offset. Ideally, I would have some mathematical proof of
# why this is true, but I haven't really thought about it enough.
baseoffset = offset - dstoffset
adjustment = baseoffset
if (lastbaseoffset is not None and baseoffset != lastbaseoffset
and tti.isdst != lastdst):
# The base DST has changed
adjustment = lastbaseoffset
lastdst = tti.isdst
lastoffset = offset
lastbaseoffset = baseoffset
out.trans_list.append(out.trans_list_utc[i] + adjustment)
out.trans_idx = tuple(out.trans_idx) out.trans_idx = tuple(out.trans_idx)
out.trans_list = tuple(out.trans_list) out.trans_list = tuple(out.trans_list)
@ -1255,7 +1271,7 @@ class tzical(object):
fileobj = open(fileobj, 'r') fileobj = open(fileobj, 'r')
else: else:
self._s = getattr(fileobj, 'name', repr(fileobj)) self._s = getattr(fileobj, 'name', repr(fileobj))
fileobj = _ContextWrapper(fileobj) fileobj = _nullcontext(fileobj)
self._vtz = {} self._vtz = {}
@ -1528,7 +1544,9 @@ def __get_gettz():
""" """
def __init__(self): def __init__(self):
self.__instances = {} self.__instances = weakref.WeakValueDictionary()
self.__strong_cache_size = 8
self.__strong_cache = OrderedDict()
self._cache_lock = _thread.allocate_lock() self._cache_lock = _thread.allocate_lock()
def __call__(self, name=None): def __call__(self, name=None):
@ -1537,17 +1555,37 @@ def __get_gettz():
if rv is None: if rv is None:
rv = self.nocache(name=name) rv = self.nocache(name=name)
if not (name is None or isinstance(rv, tzlocal_classes)): if not (name is None
or isinstance(rv, tzlocal_classes)
or rv is None):
# tzlocal is slightly more complicated than the other # tzlocal is slightly more complicated than the other
# time zone providers because it depends on environment # time zone providers because it depends on environment
# at construction time, so don't cache that. # at construction time, so don't cache that.
#
# We also cannot store weak references to None, so we
# will also not store that.
self.__instances[name] = rv self.__instances[name] = rv
else:
# No need for strong caching, return immediately
return rv
self.__strong_cache[name] = self.__strong_cache.pop(name, rv)
if len(self.__strong_cache) > self.__strong_cache_size:
self.__strong_cache.popitem(last=False)
return rv return rv
def set_cache_size(self, size):
with self._cache_lock:
self.__strong_cache_size = size
while len(self.__strong_cache) > size:
self.__strong_cache.popitem(last=False)
def cache_clear(self): def cache_clear(self):
with self._cache_lock: with self._cache_lock:
self.__instances = {} self.__instances = weakref.WeakValueDictionary()
self.__strong_cache.clear()
@staticmethod @staticmethod
def nocache(name=None): def nocache(name=None):
@ -1558,7 +1596,7 @@ def __get_gettz():
name = os.environ["TZ"] name = os.environ["TZ"]
except KeyError: except KeyError:
pass pass
if name is None or name == ":": if name is None or name in ("", ":"):
for filepath in TZFILES: for filepath in TZFILES:
if not os.path.isabs(filepath): if not os.path.isabs(filepath):
filename = filepath filename = filepath
@ -1577,8 +1615,15 @@ def __get_gettz():
else: else:
tz = tzlocal() tz = tzlocal()
else: else:
try:
if name.startswith(":"): if name.startswith(":"):
name = name[1:] name = name[1:]
except TypeError as e:
if isinstance(name, bytes):
new_msg = "gettz argument should be str, not bytes"
six.raise_from(TypeError(new_msg), e)
else:
raise
if os.path.isabs(name): if os.path.isabs(name):
if os.path.isfile(name): if os.path.isfile(name):
tz = tzfile(name) tz = tzfile(name)
@ -1601,7 +1646,8 @@ def __get_gettz():
if tzwin is not None: if tzwin is not None:
try: try:
tz = tzwin(name) tz = tzwin(name)
except WindowsError: except (WindowsError, UnicodeEncodeError):
# UnicodeEncodeError is for Python 2.7 compat
tz = None tz = None
if not tz: if not tz:
@ -1622,7 +1668,7 @@ def __get_gettz():
break break
else: else:
if name in ("GMT", "UTC"): if name in ("GMT", "UTC"):
tz = tzutc() tz = UTC
elif name in time.tzname: elif name in time.tzname:
tz = tzlocal() tz = tzlocal()
return tz return tz
@ -1662,7 +1708,7 @@ def datetime_exists(dt, tz=None):
# This is essentially a test of whether or not the datetime can survive # This is essentially a test of whether or not the datetime can survive
# a round trip to UTC. # a round trip to UTC.
dt_rt = dt.replace(tzinfo=tz).astimezone(tzutc()).astimezone(tz) dt_rt = dt.replace(tzinfo=tz).astimezone(UTC).astimezone(tz)
dt_rt = dt_rt.replace(tzinfo=None) dt_rt = dt_rt.replace(tzinfo=None)
return dt == dt_rt return dt == dt_rt
@ -1768,7 +1814,25 @@ def _datetime_to_timestamp(dt):
return (dt.replace(tzinfo=None) - EPOCH).total_seconds() return (dt.replace(tzinfo=None) - EPOCH).total_seconds()
class _ContextWrapper(object): if sys.version_info >= (3, 6):
def _get_supported_offset(second_offset):
return second_offset
else:
def _get_supported_offset(second_offset):
# For python pre-3.6, round to full-minutes if that's not the case.
# Python's datetime doesn't accept sub-minute timezones. Check
# http://python.org/sf/1447945 or https://bugs.python.org/issue5288
# for some information.
old_offset = second_offset
calculated_offset = 60 * ((second_offset + 30) // 60)
return calculated_offset
try:
# Python 3.7 feature
from contextlib import nullcontext as _nullcontext
except ImportError:
class _nullcontext(object):
""" """
Class for wrapping contexts so that they are passed through in a Class for wrapping contexts so that they are passed through in a
with statement. with statement.

View file

@ -1,3 +1,11 @@
# -*- coding: utf-8 -*-
"""
This module provides an interface to the native time zone data on Windows,
including :py:class:`datetime.tzinfo` implementations.
Attempting to import this module on a non-Windows platform will raise an
:py:obj:`ImportError`.
"""
# This code was originally contributed by Jeffrey Harris. # This code was originally contributed by Jeffrey Harris.
import datetime import datetime
import struct import struct
@ -39,7 +47,7 @@ TZKEYNAME = _settzkeyname()
class tzres(object): class tzres(object):
""" """
Class for accessing `tzres.dll`, which contains timezone name related Class for accessing ``tzres.dll``, which contains timezone name related
resources. resources.
.. versionadded:: 2.5.0 .. versionadded:: 2.5.0
@ -72,9 +80,10 @@ class tzres(object):
:param offset: :param offset:
A positive integer value referring to a string from the tzres dll. A positive integer value referring to a string from the tzres dll.
..note: .. note::
Offsets found in the registry are generally of the form Offsets found in the registry are generally of the form
`@tzres.dll,-114`. The offset in this case if 114, not -114. ``@tzres.dll,-114``. The offset in this case is 114, not -114.
""" """
resource = self.p_wchar() resource = self.p_wchar()
@ -146,6 +155,9 @@ class tzwinbase(tzrangebase):
return result return result
def display(self): def display(self):
"""
Return the display name of the time zone.
"""
return self._display return self._display
def transitions(self, year): def transitions(self, year):
@ -188,6 +200,17 @@ class tzwinbase(tzrangebase):
class tzwin(tzwinbase): class tzwin(tzwinbase):
"""
Time zone object created from the zone info in the Windows registry
These are similar to :py:class:`dateutil.tz.tzrange` objects in that
the time zone data is provided in the format of a single offset rule
for either 0 or 2 time zone transitions per year.
:param: name
The name of a Windows time zone key, e.g. "Eastern Standard Time".
The full list of keys can be retrieved with :func:`tzwin.list`.
"""
def __init__(self, name): def __init__(self, name):
self._name = name self._name = name
@ -234,6 +257,22 @@ class tzwin(tzwinbase):
class tzwinlocal(tzwinbase): class tzwinlocal(tzwinbase):
"""
Class representing the local time zone information in the Windows registry
While :class:`dateutil.tz.tzlocal` makes system calls (via the :mod:`time`
module) to retrieve time zone information, ``tzwinlocal`` retrieves the
rules directly from the Windows registry and creates an object like
:class:`dateutil.tz.tzwin`.
Because Windows does not have an equivalent of :func:`time.tzset`, on
Windows, :class:`dateutil.tz.tzlocal` instances will always reflect the
time zone settings *at the time that the process was started*, meaning
changes to the machine's time zone settings during the run of a program
on Windows will **not** be reflected by :class:`dateutil.tz.tzlocal`.
Because ``tzwinlocal`` reads the registry directly, it is unaffected by
this issue.
"""
def __init__(self): def __init__(self):
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle:
with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey: with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey:

View file

@ -28,7 +28,7 @@ def today(tzinfo=None):
def default_tzinfo(dt, tzinfo): def default_tzinfo(dt, tzinfo):
""" """
Sets the the ``tzinfo`` parameter on naive datetimes only Sets the ``tzinfo`` parameter on naive datetimes only
This is useful for example when you are provided a datetime that may have This is useful for example when you are provided a datetime that may have
either an implicit or explicit time zone, such as when parsing a time zone either an implicit or explicit time zone, such as when parsing a time zone
@ -63,7 +63,7 @@ def default_tzinfo(dt, tzinfo):
def within_delta(dt1, dt2, delta): def within_delta(dt1, dt2, delta):
""" """
Useful for comparing two datetimes that may a negilible difference Useful for comparing two datetimes that may have a negligible difference
to be considered equal. to be considered equal.
""" """
delta = abs(delta) delta = abs(delta)

View file

@ -3,7 +3,7 @@ import os
import tempfile import tempfile
import shutil import shutil
import json import json
from subprocess import check_call from subprocess import check_call, check_output
from tarfile import TarFile from tarfile import TarFile
from dateutil.zoneinfo import METADATA_FN, ZONEFILENAME from dateutil.zoneinfo import METADATA_FN, ZONEFILENAME
@ -23,11 +23,9 @@ def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None):
for name in zonegroups: for name in zonegroups:
tf.extract(name, tmpdir) tf.extract(name, tmpdir)
filepaths = [os.path.join(tmpdir, n) for n in zonegroups] filepaths = [os.path.join(tmpdir, n) for n in zonegroups]
try:
check_call(["zic", "-d", zonedir] + filepaths) _run_zic(zonedir, filepaths)
except OSError as e:
_print_on_nosuchfile(e)
raise
# write metadata file # write metadata file
with open(os.path.join(zonedir, METADATA_FN), 'w') as f: with open(os.path.join(zonedir, METADATA_FN), 'w') as f:
json.dump(metadata, f, indent=4, sort_keys=True) json.dump(metadata, f, indent=4, sort_keys=True)
@ -40,6 +38,30 @@ def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None):
shutil.rmtree(tmpdir) shutil.rmtree(tmpdir)
def _run_zic(zonedir, filepaths):
"""Calls the ``zic`` compiler in a compatible way to get a "fat" binary.
Recent versions of ``zic`` default to ``-b slim``, while older versions
don't even have the ``-b`` option (but default to "fat" binaries). The
current version of dateutil does not support Version 2+ TZif files, which
causes problems when used in conjunction with "slim" binaries, so this
function is used to ensure that we always get a "fat" binary.
"""
try:
help_text = check_output(["zic", "--help"])
except OSError as e:
_print_on_nosuchfile(e)
raise
if b"-b " in help_text:
bloat_args = ["-b", "fat"]
else:
bloat_args = []
check_call(["zic"] + bloat_args + ["-d", zonedir] + filepaths)
def _print_on_nosuchfile(e): def _print_on_nosuchfile(e):
"""Print helpful troubleshooting message """Print helpful troubleshooting message

View file

@ -142,7 +142,7 @@ def main(args=None): # pylint:disable=too-many-branches
if options.get('yaml'): if options.get('yaml'):
try: try:
import yaml # pylint:disable=unused-variable import yaml # pylint:disable=unused-variable,unused-import
except ImportError: # pragma: no cover except ImportError: # pragma: no cover
del options['yaml'] del options['yaml']
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr) print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)

View file

@ -4,4 +4,4 @@
Version module Version module
""" """
# pragma: no cover # pragma: no cover
__version__ = '3.0.3' __version__ = '3.1.1'

View file

@ -82,6 +82,19 @@ def properties(options=None):
return default_api.properties(options) return default_api.properties(options)
def suggested_expected(titles, options=None):
"""
Return a list of suggested titles to be used as `expected_title` based on the list of titles
:param titles: the filename or release name
:type titles: list|set|dict
:param options:
:type options: str|dict
:return:
:rtype: list of str
"""
return default_api.suggested_expected(titles, options)
class GuessItApi(object): class GuessItApi(object):
""" """
An api class that can be configured with custom Rebulk configuration. An api class that can be configured with custom Rebulk configuration.
@ -228,5 +241,23 @@ class GuessItApi(object):
ordered = self.rebulk.customize_properties(ordered) ordered = self.rebulk.customize_properties(ordered)
return ordered return ordered
def suggested_expected(self, titles, options=None):
"""
Return a list of suggested titles to be used as `expected_title` based on the list of titles
:param titles: the filename or release name
:type titles: list|set|dict
:param options:
:type options: str|dict
:return:
:rtype: list of str
"""
suggested = []
for title in titles:
guess = self.guessit(title, options)
if len(guess) != 2 or 'title' not in guess:
suggested.append(title)
return suggested
default_api = GuessItApi() default_api = GuessItApi()

View file

@ -4,7 +4,7 @@
Backports Backports
""" """
# pragma: no-cover # pragma: no-cover
# pylint: disabled # pylint: skip-file
def cmp_to_key(mycmp): def cmp_to_key(mycmp):
"""functools.cmp_to_key backport""" """functools.cmp_to_key backport"""

View file

@ -1,18 +1,19 @@
{ {
"expected_title": [ "expected_title": [
"OSS 117" "OSS 117",
"This is Us"
], ],
"allowed_countries": [ "allowed_countries": [
"au", "au",
"us", "gb",
"gb" "us"
], ],
"allowed_languages": [ "allowed_languages": [
"ca",
"cs",
"de", "de",
"en", "en",
"es", "es",
"ca",
"cs",
"fr", "fr",
"he", "he",
"hi", "hi",
@ -20,7 +21,9 @@
"it", "it",
"ja", "ja",
"ko", "ko",
"mul",
"nl", "nl",
"no",
"pl", "pl",
"pt", "pt",
"ro", "ro",
@ -28,18 +31,50 @@
"sv", "sv",
"te", "te",
"uk", "uk",
"mul",
"und" "und"
], ],
"advanced_config": { "advanced_config": {
"common_words": [ "common_words": [
"ca",
"cat",
"de", "de",
"it" "he",
"it",
"no",
"por",
"rum",
"se",
"st",
"sub"
], ],
"groups": { "groups": {
"starting": "([{", "starting": "([{",
"ending": ")]}" "ending": ")]}"
}, },
"audio_codec": {
"audio_channels": {
"1.0": [
"1ch",
"mono"
],
"2.0": [
"2ch",
"stereo",
"re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
],
"5.1": [
"5ch",
"6ch",
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
],
"7.1": [
"7ch",
"8ch",
"re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
]
}
},
"container": { "container": {
"subtitles": [ "subtitles": [
"srt", "srt",
@ -59,9 +94,10 @@
"avi", "avi",
"divx", "divx",
"flv", "flv",
"mk3d", "iso",
"m4v", "m4v",
"mk2", "mk2",
"mk3d",
"mka", "mka",
"mkv", "mkv",
"mov", "mov",
@ -77,12 +113,11 @@
"ram", "ram",
"rm", "rm",
"ts", "ts",
"vob",
"wav", "wav",
"webm", "webm",
"wma", "wma",
"wmv", "wmv"
"iso",
"vob"
], ],
"torrent": [ "torrent": [
"torrent" "torrent"
@ -255,7 +290,6 @@
], ],
"subtitle_prefixes": [ "subtitle_prefixes": [
"st", "st",
"v",
"vost", "vost",
"subforced", "subforced",
"fansub", "fansub",
@ -297,12 +331,12 @@
}, },
"release_group": { "release_group": {
"forbidden_names": [ "forbidden_names": [
"rip", "bonus",
"by", "by",
"for", "for",
"par", "par",
"pour", "pour",
"bonus" "rip"
], ],
"ignored_seps": "[]{}()" "ignored_seps": "[]{}()"
}, },
@ -311,6 +345,7 @@
"23.976", "23.976",
"24", "24",
"25", "25",
"29.970",
"30", "30",
"48", "48",
"50", "50",
@ -329,6 +364,7 @@
"progressive": [ "progressive": [
"360", "360",
"480", "480",
"540",
"576", "576",
"900", "900",
"1080", "1080",
@ -342,8 +378,8 @@
"website": { "website": {
"safe_tlds": [ "safe_tlds": [
"com", "com",
"org", "net",
"net" "org"
], ],
"safe_subdomains": [ "safe_subdomains": [
"www" "www"
@ -351,12 +387,200 @@
"safe_prefixes": [ "safe_prefixes": [
"co", "co",
"com", "com",
"org", "net",
"net" "org"
], ],
"prefixes": [ "prefixes": [
"from" "from"
] ]
},
"streaming_service": {
"A&E": [
"AE",
"A&E"
],
"ABC": "AMBC",
"ABC Australia": "AUBC",
"Al Jazeera English": "AJAZ",
"AMC": "AMC",
"Amazon Prime": [
"AMZN",
"Amazon",
"re:Amazon-?Prime"
],
"Adult Swim": [
"AS",
"re:Adult-?Swim"
],
"America's Test Kitchen": "ATK",
"Animal Planet": "ANPL",
"AnimeLab": "ANLB",
"AOL": "AOL",
"ARD": "ARD",
"BBC iPlayer": [
"iP",
"re:BBC-?iPlayer"
],
"BravoTV": "BRAV",
"Canal+": "CNLP",
"Cartoon Network": "CN",
"CBC": "CBC",
"CBS": "CBS",
"CNBC": "CNBC",
"Comedy Central": [
"CC",
"re:Comedy-?Central"
],
"Channel 4": "4OD",
"CHRGD": "CHGD",
"Cinemax": "CMAX",
"Country Music Television": "CMT",
"Comedians in Cars Getting Coffee": "CCGC",
"Crunchy Roll": [
"CR",
"re:Crunchy-?Roll"
],
"Crackle": "CRKL",
"CSpan": "CSPN",
"CTV": "CTV",
"CuriosityStream": "CUR",
"CWSeed": "CWS",
"Daisuki": "DSKI",
"DC Universe": "DCU",
"Deadhouse Films": "DHF",
"DramaFever": [
"DF",
"DramaFever"
],
"Digiturk Diledigin Yerde": "DDY",
"Discovery": [
"DISC",
"Discovery"
],
"Disney": [
"DSNY",
"Disney"
],
"DIY Network": "DIY",
"Doc Club": "DOCC",
"DPlay": "DPLY",
"E!": "ETV",
"ePix": "EPIX",
"El Trece": "ETTV",
"ESPN": "ESPN",
"Esquire": "ESQ",
"Family": "FAM",
"Family Jr": "FJR",
"Food Network": "FOOD",
"Fox": "FOX",
"Freeform": "FREE",
"FYI Network": "FYI",
"Global": "GLBL",
"GloboSat Play": "GLOB",
"Hallmark": "HLMK",
"HBO Go": [
"HBO",
"re:HBO-?Go"
],
"HGTV": "HGTV",
"History": [
"HIST",
"History"
],
"Hulu": "HULU",
"Investigation Discovery": "ID",
"IFC": "IFC",
"iTunes": "iTunes",
"ITV": "ITV",
"Knowledge Network": "KNOW",
"Lifetime": "LIFE",
"Motor Trend OnDemand": "MTOD",
"MBC": [
"MBC",
"MBCVOD"
],
"MSNBC": "MNBC",
"MTV": "MTV",
"National Geographic": [
"NATG",
"re:National-?Geographic"
],
"NBA TV": [
"NBA",
"re:NBA-?TV"
],
"NBC": "NBC",
"Netflix": [
"NF",
"Netflix"
],
"NFL": "NFL",
"NFL Now": "NFLN",
"NHL GameCenter": "GC",
"Nickelodeon": [
"NICK",
"Nickelodeon"
],
"Norsk Rikskringkasting": "NRK",
"OnDemandKorea": [
"ODK",
"OnDemandKorea"
],
"PBS": "PBS",
"PBS Kids": "PBSK",
"Playstation Network": "PSN",
"Pluzz": "PLUZ",
"RTE One": "RTE",
"SBS (AU)": "SBS",
"SeeSo": [
"SESO",
"SeeSo"
],
"Shomi": "SHMI",
"Spike": "SPIK",
"Spike TV": [
"SPKE",
"re:Spike-?TV"
],
"Sportsnet": "SNET",
"Sprout": "SPRT",
"Stan": "STAN",
"Starz": "STZ",
"Sveriges Television": "SVT",
"SwearNet": "SWER",
"Syfy": "SYFY",
"TBS": "TBS",
"TFou": "TFOU",
"The CW": [
"CW",
"re:The-?CW"
],
"TLC": "TLC",
"TubiTV": "TUBI",
"TV3 Ireland": "TV3",
"TV4 Sweeden": "TV4",
"TVING": "TVING",
"TV Land": [
"TVL",
"re:TV-?Land"
],
"UFC": "UFC",
"UKTV": "UKTV",
"Univision": "UNIV",
"USA Network": "USAN",
"Velocity": "VLCT",
"VH1": "VH1",
"Viceland": "VICE",
"Viki": "VIKI",
"Vimeo": "VMEO",
"VRV": "VRV",
"W Network": "WNET",
"WatchMe": "WME",
"WWE Network": "WWEN",
"Xbox Video": "XBOX",
"Yahoo": "YHOO",
"YouTube Red": "RED",
"ZDF": "ZDF"
} }
} }
} }

View file

@ -128,7 +128,7 @@ class ConfigurationException(Exception):
""" """
Exception related to configuration file. Exception related to configuration file.
""" """
pass pass # pylint:disable=unnecessary-pass
def load_config(options): def load_config(options):
@ -153,7 +153,7 @@ def load_config(options):
cwd = os.getcwd() cwd = os.getcwd()
yaml_supported = False yaml_supported = False
try: try:
import yaml # pylint: disable=unused-variable import yaml # pylint:disable=unused-variable,unused-import
yaml_supported = True yaml_supported = True
except ImportError: except ImportError:
pass pass
@ -252,7 +252,7 @@ def load_config_file(filepath):
try: try:
import yaml import yaml
with open(filepath) as config_file_data: with open(filepath) as config_file_data:
return yaml.load(config_file_data) return yaml.load(config_file_data, yaml.SafeLoader)
except ImportError: # pragma: no cover except ImportError: # pragma: no cover
raise ConfigurationException('Configuration file extension is not supported. ' raise ConfigurationException('Configuration file extension is not supported. '
'PyYAML should be installed to support "%s" file' % ( 'PyYAML should be installed to support "%s" file' % (

View file

@ -25,7 +25,7 @@ def _potential_before(i, input_string):
:return: :return:
:rtype: bool :rtype: bool
""" """
return i - 2 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
def _potential_after(i, input_string): def _potential_after(i, input_string):

View file

@ -28,7 +28,7 @@ def int_coercable(string):
return False return False
def compose(*validators): def and_(*validators):
""" """
Compose validators functions Compose validators functions
:param validators: :param validators:
@ -49,3 +49,26 @@ def compose(*validators):
return False return False
return True return True
return composed return composed
def or_(*validators):
"""
Compose validators functions
:param validators:
:type validators:
:return:
:rtype:
"""
def composed(string):
"""
Composed validators function
:param string:
:type string:
:return:
:rtype:
"""
for validator in validators:
if validator(string):
return True
return False
return composed

View file

@ -0,0 +1,20 @@
"""
Match processors
"""
from guessit.rules.common import seps
def strip(match, chars=seps):
"""
Strip given characters from match.
:param chars:
:param match:
:return:
"""
while match.input_string[match.start] in chars:
match.start += 1
while match.input_string[match.end - 1] in chars:
match.end -= 1
if not match:
return False

View file

@ -34,7 +34,9 @@ class EnlargeGroupMatches(CustomRule):
for match in matches.ending(group.end - 1): for match in matches.ending(group.end - 1):
ending.append(match) ending.append(match)
if starting or ending:
return starting, ending return starting, ending
return False
def then(self, matches, when_response, context): def then(self, matches, when_response, context):
starting, ending = when_response starting, ending = when_response

View file

@ -3,9 +3,8 @@
""" """
audio_codec, audio_profile and audio_channels property audio_codec, audio_profile and audio_channels property
""" """
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from ..common import dash from ..common import dash
from ..common.pattern import is_disabled from ..common.pattern import is_disabled
@ -23,7 +22,9 @@ def audio_codec(config): # pylint:disable=unused-argument
:return: Created Rebulk object :return: Created Rebulk object
:rtype: Rebulk :rtype: Rebulk
""" """
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk = Rebulk()\
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
.string_defaults(ignore_case=True)
def audio_codec_priority(match1, match2): def audio_codec_priority(match1, match2):
""" """
@ -61,7 +62,9 @@ def audio_codec(config): # pylint:disable=unused-argument
rebulk.string('PCM', value='PCM') rebulk.string('PCM', value='PCM')
rebulk.string('LPCM', value='LPCM') rebulk.string('LPCM', value='LPCM')
rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile')) rebulk.defaults(clear=True,
name='audio_profile',
disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD']) rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD']) rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS']) rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
@ -70,17 +73,19 @@ def audio_codec(config): # pylint:disable=unused-argument
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital']) rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital']) rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels')) rebulk.defaults(clear=True,
rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True) name="audio_channels",
rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True) disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True)
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
rebulk.string('7ch', '8ch', value='7.1')
rebulk.string('5ch', '6ch', value='5.1') for value, items in config.get('audio_channels').items():
rebulk.string('2ch', 'stereo', value='2.0') for item in items:
rebulk.string('1ch', 'mono', value='1.0') if item.startswith('re:'):
rebulk.regex(item[3:], value=value, children=True)
else:
rebulk.string(item, value=value)
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule, rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
AudioChannelsValidatorRule) AudioChannelsValidatorRule)

View file

@ -69,4 +69,6 @@ class BitRateTypeRule(Rule):
else: else:
to_rename.append(match) to_rename.append(match)
if to_rename or to_remove:
return to_rename, to_remove return to_rename, to_remove
return False

View file

@ -26,7 +26,8 @@ def bonus(config): # pylint:disable=unused-argument
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int, rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
validator={'__parent__': lambda match: seps_surround}, validator={'__parent__': seps_surround},
validate_all=True,
conflict_solver=lambda match, conflicting: match conflict_solver=lambda match, conflicting: match
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
else '__default__') else '__default__')

View file

@ -44,7 +44,8 @@ def container(config):
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent']) rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb']) rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
rebulk.defaults(name='container', rebulk.defaults(clear=True,
name='container',
validator=seps_surround, validator=seps_surround,
formatter=lambda s: s.lower(), formatter=lambda s: s.lower(),
conflict_solver=lambda match, other: match conflict_solver=lambda match, other: match

View file

@ -10,6 +10,7 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PRO
from ..common import seps, title_seps from ..common import seps, title_seps
from ..common.formatters import cleanup from ..common.formatters import cleanup
from ..common.pattern import is_disabled from ..common.pattern import is_disabled
from ..common.validators import or_
from ..properties.title import TitleFromPosition, TitleBaseRule from ..properties.title import TitleFromPosition, TitleBaseRule
from ..properties.type import TypeProcessor from ..properties.type import TypeProcessor
@ -133,8 +134,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
def hole_filter(self, hole, matches): def hole_filter(self, hole, matches):
episode = matches.previous(hole, episode = matches.previous(hole,
lambda previous: any(name in previous.names lambda previous: previous.named(*self.previous_names),
for name in self.previous_names),
0) 0)
crc32 = matches.named('crc32') crc32 = matches.named('crc32')
@ -179,8 +179,7 @@ class AlternativeTitleReplace(Rule):
predicate=lambda match: 'title' in match.tags, index=0) predicate=lambda match: 'title' in match.tags, index=0)
if main_title: if main_title:
episode = matches.previous(main_title, episode = matches.previous(main_title,
lambda previous: any(name in previous.names lambda previous: previous.named(*self.previous_names),
for name in self.previous_names),
0) 0)
crc32 = matches.named('crc32') crc32 = matches.named('crc32')
@ -249,7 +248,7 @@ class Filepart3EpisodeTitle(Rule):
if season: if season:
hole = matches.holes(subdirectory.start, subdirectory.end, hole = matches.holes(subdirectory.start, subdirectory.end,
ignore=lambda match: 'weak-episode' in match.tags, ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value, formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
index=0) index=0)
if hole: if hole:
@ -292,7 +291,8 @@ class Filepart2EpisodeTitle(Rule):
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0)) matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
if season: if season:
hole = matches.holes(directory.start, directory.end, ignore=lambda match: 'weak-episode' in match.tags, hole = matches.holes(directory.start, directory.end,
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps, formatter=cleanup, seps=title_seps,
predicate=lambda match: match.value, index=0) predicate=lambda match: match.value, index=0)
if hole: if hole:

View file

@ -11,12 +11,13 @@ from rebulk.match import Match
from rebulk.remodule import re from rebulk.remodule import re
from rebulk.utils import is_iterable from rebulk.utils import is_iterable
from guessit.rules import match_processors
from guessit.rules.common.numeral import parse_numeral, numeral
from .title import TitleFromPosition from .title import TitleFromPosition
from ..common import dash, alt_dash, seps, seps_no_fs from ..common import dash, alt_dash, seps, seps_no_fs
from ..common.formatters import strip from ..common.formatters import strip
from ..common.numeral import numeral, parse_numeral
from ..common.pattern import is_disabled from ..common.pattern import is_disabled
from ..common.validators import compose, seps_surround, seps_before, int_coercable from ..common.validators import seps_surround, int_coercable, and_
from ...reutils import build_or_pattern from ...reutils import build_or_pattern
@ -29,17 +30,12 @@ def episodes(config):
:return: Created Rebulk object :return: Created Rebulk object
:rtype: Rebulk :rtype: Rebulk
""" """
# pylint: disable=too-many-branches,too-many-statements,too-many-locals # pylint: disable=too-many-branches,too-many-statements,too-many-locals
def is_season_episode_disabled(context): def is_season_episode_disabled(context):
"""Whether season and episode rules should be enabled.""" """Whether season and episode rules should be enabled."""
return is_disabled(context, 'episode') or is_disabled(context, 'season') return is_disabled(context, 'episode') or is_disabled(context, 'season')
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
def episodes_season_chain_breaker(matches): def episodes_season_chain_breaker(matches):
""" """
Break chains if there's more than 100 offset between two neighbor values. Break chains if there's more than 100 offset between two neighbor values.
@ -57,8 +53,6 @@ def episodes(config):
return True return True
return False return False
rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
def season_episode_conflict_solver(match, other): def season_episode_conflict_solver(match, other):
""" """
Conflict solver for episode/season patterns Conflict solver for episode/season patterns
@ -76,7 +70,6 @@ def episodes(config):
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
and not match.initiator.children.named(match.name + 'Marker')) or ( and not match.initiator.children.named(match.name + 'Marker')) or (
other.name == 'screen_size' and not int_coercable(other.raw)): other.name == 'screen_size' and not int_coercable(other.raw)):
return match return match
if other.name in ('season', 'episode') and match.initiator != other.initiator: if other.name in ('season', 'episode') and match.initiator != other.initiator:
if (match.initiator.name in ('weak_episode', 'weak_duplicate') if (match.initiator.name in ('weak_episode', 'weak_duplicate')
@ -87,21 +80,6 @@ def episodes(config):
return current return current
return '__default__' return '__default__'
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
max_range_gap = config['max_range_gap']
def ordering_validator(match): def ordering_validator(match):
""" """
Validator for season list. They should be in natural order to be validated. Validator for season list. They should be in natural order to be validated.
@ -135,6 +113,7 @@ def episodes(config):
lambda m: m.name == property_name + 'Separator') lambda m: m.name == property_name + 'Separator')
separator = match.children.previous(current_match, separator = match.children.previous(current_match,
lambda m: m.name == property_name + 'Separator', 0) lambda m: m.name == property_name + 'Separator', 0)
if separator:
if separator.raw not in range_separators and separator.raw in weak_discrete_separators: if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1: if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False valid = False
@ -146,54 +125,6 @@ def episodes(config):
return is_consecutive('episode') and is_consecutive('season') return is_consecutive('episode') and is_consecutive('season')
# S01E02, 01x02, S01S02S03
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
children=True,
private_parent=True,
validate_all=True,
validator={'__parent__': ordering_validator},
conflict_solver=season_episode_conflict_solver,
disabled=is_season_episode_disabled) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}).repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail, value=episode_detail, name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
def validate_roman(match): def validate_roman(match):
""" """
Validate a roman match if surrounded by separators Validate a roman match if surrounded by separators
@ -206,117 +137,203 @@ def episodes(config):
return True return True
return seps_surround(match) return seps_surround(match)
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
max_range_gap = config['max_range_gap']
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.string_defaults(ignore_case=True) \
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver,
abbreviations=[alt_dash])
# S01E02, 01x02, S01S02S03
rebulk.chain(
tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
.repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)').repeater('+') \
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)') \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail,
private_parent=False,
children=False,
value=episode_detail,
name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'], rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True, validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver) conflict_solver=season_episode_conflict_solver)
rebulk.chain(abbreviations=[alt_dash], rebulk.chain(validate_all=True,
conflict_solver=season_episode_conflict_solver,
formatter={'season': parse_numeral, 'count': parse_numeral}, formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'__parent__': compose(seps_surround, ordering_validator), validator={'__parent__': and_(seps_surround, ordering_validator),
'season': validate_roman, 'season': validate_roman,
'count': validate_roman}, 'count': validate_roman},
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \ disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
.defaults(validator=None) \ .defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'season': validate_roman, 'count': validate_roman},
conflict_solver=season_episode_conflict_solver) \
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \ .regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'], .regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
name='seasonSeparator', escape=True) + name='seasonSeparator', escape=True) +
r'@?(?P<season>\d+)').repeater('*') r'@?(?P<season>\d+)').repeater('*')
rebulk.defaults(abbreviations=[dash])
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' + rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
r'(?:v(?P<version>\d+))?' + r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode')) disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' + rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
r'(?:v(?P<version>\d+))?' + r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash],
validator={'episode': validate_roman}, validator={'episode': validate_roman},
formatter={'episode': parse_numeral, 'version': int, 'count': int}, formatter={'episode': parse_numeral},
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
tags=['SxxExx'], tags=['SxxExx'],
abbreviations=[dash], formatter={'other': lambda match: 'Complete'},
validator=None,
formatter={'season': int, 'other': lambda match: 'Complete'},
disabled=lambda context: is_disabled(context, 'season')) disabled=lambda context: is_disabled(context, 'season'))
# 12, 13 # 12, 13
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \ .defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d{2})') \ .regex(r'(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
# 012, 013 # 012, 013
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \ .defaults(validator=None, tags=['weak-episode']) \
.regex(r'0(?P<episode>\d{1,2})') \ .regex(r'0(?P<episode>\d{1,2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# 112, 113 # 112, 113
rebulk.chain(tags=['weak-episode'], rebulk.chain(tags=['weak-episode'],
formatter={'episode': int, 'version': int},
name='weak_episode', name='weak_episode',
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \ .defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
.regex(r'(?P<episode>\d{3,4})') \ .regex(r'(?P<episode>\d{3,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
# 1, 2, 3 # 1, 2, 3
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \ disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
.defaults(validator=None) \ .defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d)') \ .regex(r'(?P<episode>\d)') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# e112, e113, 1e18, 3e19 # e112, e113, 1e18, 3e19
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
rebulk.chain(formatter={'season': int, 'episode': int, 'version': int},
disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \ .defaults(validator=None) \
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \ .regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# ep 112, ep113, ep112, ep113 # ep 112, ep113, ep112, ep113
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}, rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \ .defaults(validator=None) \
.regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'ep-?(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# cap 112, cap 112_114 # cap 112, cap 112_114
rebulk.chain(abbreviations=[dash], rebulk.chain(tags=['see-pattern'],
tags=['see-pattern'],
formatter={'season': int, 'episode': int},
disabled=is_season_episode_disabled) \ disabled=is_season_episode_disabled) \
.defaults(validator=None) \ .defaults(validator=None, tags=['see-pattern']) \
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?') .regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
# 102, 0102 # 102, 0102
rebulk.chain(tags=['weak-episode', 'weak-duplicate'], rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
formatter={'season': int, 'episode': int, 'version': int},
name='weak_duplicate', name='weak_duplicate',
conflict_solver=season_episode_conflict_solver, conflict_solver=season_episode_conflict_solver,
disabled=lambda context: (context.get('episode_prefer_number', False) or disabled=lambda context: (context.get('episode_prefer_number', False) or
context.get('type') == 'movie') or is_season_episode_disabled(context)) \ context.get('type') == 'movie') or is_season_episode_disabled(context)) \
.defaults(validator=None) \ .defaults(tags=['weak-episode', 'weak-duplicate'],
name='weak_duplicate',
validator=None,
conflict_solver=season_episode_conflict_solver) \
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int, rebulk.regex(r'v(?P<version>\d+)',
formatter=int,
disabled=lambda context: is_disabled(context, 'version')) disabled=lambda context: is_disabled(context, 'version'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
@ -325,18 +342,23 @@ def episodes(config):
# detached of X count (season/episode) # detached of X count (season/episode)
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) + rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
abbreviations=[dash], children=True, private_parent=True, formatter=int, formatter=int,
pre_match_processor=match_processors.strip,
disabled=lambda context: is_disabled(context, 'episode')) disabled=lambda context: is_disabled(context, 'episode'))
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode", rebulk.regex(r'Minisodes?',
children=False,
private_parent=False,
name='episode_format',
value="Minisode",
disabled=lambda context: is_disabled(context, 'episode_format')) disabled=lambda context: is_disabled(context, 'episode_format'))
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode, rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
SeePatternRange(range_separators + ['_']), SeePatternRange(range_separators + ['_']),
EpisodeNumberSeparatorRange(range_separators), EpisodeNumberSeparatorRange(range_separators),
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch) RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
return rebulk return rebulk
@ -416,7 +438,9 @@ class WeakConflictSolver(Rule):
if to_append: if to_append:
to_remove.extend(weak_dup_matches) to_remove.extend(weak_dup_matches)
if to_remove or to_append:
return to_remove, to_append return to_remove, to_append
return False
class CountValidator(Rule): class CountValidator(Rule):
@ -442,7 +466,9 @@ class CountValidator(Rule):
season_count.append(count) season_count.append(count)
else: else:
to_remove.append(count) to_remove.append(count)
if to_remove or episode_count or season_count:
return to_remove, episode_count, season_count return to_remove, episode_count, season_count
return False
class SeePatternRange(Rule): class SeePatternRange(Rule):
@ -477,7 +503,9 @@ class SeePatternRange(Rule):
to_remove.append(separator) to_remove.append(separator)
if to_remove or to_append:
return to_remove, to_append return to_remove, to_append
return False
class AbstractSeparatorRange(Rule): class AbstractSeparatorRange(Rule):
@ -533,7 +561,9 @@ class AbstractSeparatorRange(Rule):
previous_match = next_match previous_match = next_match
if to_remove or to_append:
return to_remove, to_append return to_remove, to_append
return False
class RenameToAbsoluteEpisode(Rule): class RenameToAbsoluteEpisode(Rule):
@ -629,20 +659,41 @@ class RemoveWeak(Rule):
Remove weak-episode matches which appears after video, source, and audio matches. Remove weak-episode matches which appears after video, source, and audio matches.
""" """
priority = 16 priority = 16
consequence = RemoveMatch consequence = RemoveMatch, AppendMatch
def __init__(self, episode_words):
super(RemoveWeak, self).__init__()
self.episode_words = episode_words
def when(self, matches, context): def when(self, matches, context):
to_remove = [] to_remove = []
to_append = []
for filepart in matches.markers.named('path'): for filepart in matches.markers.named('path'):
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags) weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
if weaks: if weaks:
previous = matches.previous(weaks[0], predicate=lambda m: m.name in ( weak = weaks[0]
previous = matches.previous(weak, predicate=lambda m: m.name in (
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile', 'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
'audio_channels', 'audio_profile'), index=0) 'audio_channels', 'audio_profile'), index=0)
if previous and not matches.holes( if previous and not matches.holes(
previous.end, weaks[0].start, predicate=lambda m: m.raw.strip(seps)): previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
if previous.raw.lower() in self.episode_words:
try:
episode = copy.copy(weak)
episode.name = 'episode'
episode.value = int(weak.value)
episode.start = previous.start
episode.private = False
episode.tags = []
to_append.append(episode)
except ValueError:
pass
to_remove.extend(weaks) to_remove.extend(weaks)
return to_remove if to_remove or to_append:
return to_remove, to_append
return False
class RemoveWeakIfSxxExx(Rule): class RemoveWeakIfSxxExx(Rule):
@ -856,4 +907,6 @@ class RenameToDiscMatch(Rule):
markers.append(marker) markers.append(marker)
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value)) discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
if discs or markers or to_remove:
return discs, markers, to_remove return discs, markers, to_remove
return False

View file

@ -72,6 +72,8 @@ def language(config, common_words):
UNDETERMINED = babelfish.Language('und') UNDETERMINED = babelfish.Language('und')
MULTIPLE = babelfish.Language('mul')
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
@ -388,7 +390,9 @@ class SubtitlePrefixLanguageRule(Rule):
to_remove.extend(matches.conflicting(lang)) to_remove.extend(matches.conflicting(lang))
if prefix in to_remove: if prefix in to_remove:
to_remove.remove(prefix) to_remove.remove(prefix)
if to_rename or to_remove:
return to_rename, to_remove return to_rename, to_remove
return False
def then(self, matches, when_response, context): def then(self, matches, when_response, context):
to_rename, to_remove = when_response to_rename, to_remove = when_response
@ -425,7 +429,9 @@ class SubtitleSuffixLanguageRule(Rule):
to_append.append(lang) to_append.append(lang)
if suffix in to_remove: if suffix in to_remove:
to_remove.remove(suffix) to_remove.remove(suffix)
if to_append or to_remove:
return to_append, to_remove return to_append, to_remove
return False
def then(self, matches, when_response, context): def then(self, matches, when_response, context):
to_rename, to_remove = when_response to_rename, to_remove = when_response
@ -478,6 +484,7 @@ class RemoveInvalidLanguages(Rule):
"""Remove language matches that matches the blacklisted common words.""" """Remove language matches that matches the blacklisted common words."""
consequence = RemoveMatch consequence = RemoveMatch
priority = 32
def __init__(self, common_words): def __init__(self, common_words):
"""Constructor.""" """Constructor."""

View file

@ -11,7 +11,7 @@ from rebulk.remodule import re
from ..common import dash from ..common import dash
from ..common import seps from ..common import seps
from ..common.pattern import is_disabled from ..common.pattern import is_disabled
from ..common.validators import seps_after, seps_before, seps_surround, compose from ..common.validators import seps_after, seps_before, seps_surround, and_
from ...reutils import build_or_pattern from ...reutils import build_or_pattern
from ...rules.common.formatters import raw_cleanup from ...rules.common.formatters import raw_cleanup
@ -35,11 +35,16 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.regex('ws', 'wide-?screen', value='Widescreen') rebulk.regex('ws', 'wide-?screen', value='Widescreen')
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded') rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
rebulk.string('Proper', 'Repack', 'Rerip', value='Proper', rebulk.string('Repack', 'Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix']) tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Proper', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper', rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real']) tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.regex('Real', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after', rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
'streaming_service.prefix', 'streaming_service.suffix']) 'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix', rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
@ -72,16 +77,18 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'], private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
value={'other': 'Complete'}, value={'other': 'Complete'},
tags=['release-group-prefix'], tags=['release-group-prefix'],
validator={'__parent__': compose(seps_surround, validate_complete)}) validator={'__parent__': and_(seps_surround, validate_complete)})
rebulk.string('R5', value='Region 5') rebulk.string('R5', value='Region 5')
rebulk.string('RC', value='Region C') rebulk.string('RC', value='Region C')
rebulk.regex('Pre-?Air', value='Preair') rebulk.regex('Pre-?Air', value='Preair')
rebulk.regex('(?:PS-?)?Vita', value='PS Vita') rebulk.regex('(?:PS-?)Vita', value='PS Vita')
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'}, rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True) private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'): for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
rebulk.string(value, value=value) rebulk.string(value, value=value)
rebulk.string('3D', value='3D', tags='has-neighbor')
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor') rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
rebulk.string('HR', value='High Resolution') rebulk.string('HR', value='High Resolution')
@ -90,6 +97,7 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.string('mHD', 'HDLight', value='Micro HD') rebulk.string('mHD', 'HDLight', value='Micro HD')
rebulk.string('LDTV', value='Low Definition') rebulk.string('LDTV', value='Low Definition')
rebulk.string('HFR', value='High Frame Rate') rebulk.string('HFR', value='High Frame Rate')
rebulk.string('VFR', value='Variable Frame Rate')
rebulk.string('HD', value='HD', validator=None, rebulk.string('HD', value='HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix']) tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None, rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
@ -128,13 +136,15 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor') rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group']) rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
rebulk.string('Extras', value='Extras', tags='has-neighbor')
rebulk.regex('Digital-?Extras?', value='Extras')
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group']) rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group']) rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group') rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
ValidateAtEnd, ProperCountRule) ValidateAtEnd, ValidateReal, ProperCountRule)
return rebulk return rebulk
@ -354,3 +364,20 @@ class ValidateAtEnd(Rule):
to_remove.append(match) to_remove.append(match)
return to_remove return to_remove
class ValidateReal(Rule):
"""
Validate Real
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
if not matches.range(filepart.start, match.start):
ret.append(match)
return ret

View file

@ -8,7 +8,7 @@ from rebulk.remodule import re
from rebulk import Rebulk from rebulk import Rebulk
from ..common import dash from ..common import dash
from ..common.pattern import is_disabled from ..common.pattern import is_disabled
from ..common.validators import seps_surround, int_coercable, compose from ..common.validators import seps_surround, int_coercable, and_
from ..common.numeral import numeral, parse_numeral from ..common.numeral import numeral, parse_numeral
from ...reutils import build_or_pattern from ...reutils import build_or_pattern
@ -41,6 +41,6 @@ def part(config): # pylint:disable=unused-argument
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')', rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral, prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)}) validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
return rebulk return rebulk

View file

@ -9,8 +9,8 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
from rebulk.match import Match from rebulk.match import Match
from ..common import seps from ..common import seps
from ..common.expected import build_expected_function
from ..common.comparators import marker_sorted from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
from ..common.formatters import cleanup from ..common.formatters import cleanup
from ..common.pattern import is_disabled from ..common.pattern import is_disabled
from ..common.validators import int_coercable, seps_surround from ..common.validators import int_coercable, seps_surround
@ -50,7 +50,7 @@ def release_group(config):
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps: if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
string = string[:len(forbidden)] string = string[:len(forbidden)]
string = string.strip(groupname_seps) string = string.strip(groupname_seps)
return string return string.strip()
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group')) rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
@ -72,7 +72,9 @@ _scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'a
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language', 'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix') 'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
_scene_previous_tags = ('release-group-prefix', ) _scene_previous_tags = ('release-group-prefix',)
_scene_no_previous_tags = ('no-release-group-prefix',)
class DashSeparatedReleaseGroup(Rule): class DashSeparatedReleaseGroup(Rule):
@ -193,6 +195,7 @@ class DashSeparatedReleaseGroup(Rule):
if releasegroup.value: if releasegroup.value:
to_append.append(releasegroup) to_append.append(releasegroup)
if to_remove or to_append:
return to_remove, to_append return to_remove, to_append
@ -212,6 +215,17 @@ class SceneReleaseGroup(Rule):
super(SceneReleaseGroup, self).__init__() super(SceneReleaseGroup, self).__init__()
self.value_formatter = value_formatter self.value_formatter = value_formatter
@staticmethod
def is_previous_match(match):
"""
Check if match can precede release_group
:param match:
:return:
"""
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
match.tagged(*_scene_previous_tags)
def when(self, matches, context): # pylint:disable=too-many-locals def when(self, matches, context): # pylint:disable=too-many-locals
# If a release_group is found before, ignore this kind of release_group rule. # If a release_group is found before, ignore this kind of release_group rule.
@ -253,13 +267,12 @@ class SceneReleaseGroup(Rule):
if match.start < filepart.start: if match.start < filepart.start:
return False return False
return not match.private or match.name in _scene_previous_names return not match.private or self.is_previous_match(match)
previous_match = matches.previous(last_hole, previous_match = matches.previous(last_hole,
previous_match_filter, previous_match_filter,
index=0) index=0)
if previous_match and (previous_match.name in _scene_previous_names or if previous_match and (self.is_previous_match(previous_match)) and \
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \ not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
and not int_coercable(last_hole.value.strip(seps)): and not int_coercable(last_hole.value.strip(seps)):
@ -300,11 +313,11 @@ class AnimeReleaseGroup(Rule):
# If a release_group is found before, ignore this kind of release_group rule. # If a release_group is found before, ignore this kind of release_group rule.
if matches.named('release_group'): if matches.named('release_group'):
return to_remove, to_append return False
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'): if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
# This doesn't seems to be an anime, and we already found another release_group. # This doesn't seems to be an anime, and we already found another release_group.
return to_remove, to_append return False
for filepart in marker_sorted(matches.markers.named('path'), matches): for filepart in marker_sorted(matches.markers.named('path'), matches):
@ -328,4 +341,7 @@ class AnimeReleaseGroup(Rule):
to_append.append(group) to_append.append(group)
to_remove.extend(matches.range(empty_group.start, empty_group.end, to_remove.extend(matches.range(empty_group.start, empty_group.end,
lambda m: 'weak-language' in m.tags)) lambda m: 'weak-language' in m.tags))
if to_remove or to_append:
return to_remove, to_append return to_remove, to_append
return False

View file

@ -24,8 +24,8 @@ def screen_size(config):
:return: Created Rebulk object :return: Created Rebulk object
:rtype: Rebulk :rtype: Rebulk
""" """
interlaced = frozenset({res for res in config['interlaced']}) interlaced = frozenset(config['interlaced'])
progressive = frozenset({res for res in config['progressive']}) progressive = frozenset(config['progressive'])
frame_rates = [re.escape(rate) for rate in config['frame_rates']] frame_rates = [re.escape(rate) for rate in config['frame_rates']]
min_ar = config['min_ar'] min_ar = config['min_ar']
max_ar = config['max_ar'] max_ar = config['max_ar']

View file

@ -12,7 +12,7 @@ from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
from .audio_codec import HqConflictRule from .audio_codec import HqConflictRule
from ..common import dash, seps from ..common import dash, seps
from ..common.pattern import is_disabled from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after from ..common.validators import seps_before, seps_after, or_
def source(config): # pylint:disable=unused-argument def source(config): # pylint:disable=unused-argument
@ -26,7 +26,10 @@ def source(config): # pylint:disable=unused-argument
""" """
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source')) rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix']) rebulk = rebulk.defaults(name='source',
tags=['video-codec-prefix', 'streaming_service.suffix'],
validate_all=True,
validator={'__parent__': or_(seps_before, seps_after)})
rip_prefix = '(?P<other>Rip)-?' rip_prefix = '(?P<other>Rip)-?'
rip_suffix = '-?(?P<other>Rip)' rip_suffix = '-?(?P<other>Rip)'
@ -42,7 +45,7 @@ def source(config): # pylint:disable=unused-argument
def demote_other(match, other): # pylint: disable=unused-argument def demote_other(match, other): # pylint: disable=unused-argument
"""Default conflict solver with 'other' property.""" """Default conflict solver with 'other' property."""
return other if other.name == 'other' else '__default__' return other if other.name == 'other' or other.name == 'release_group' else '__default__'
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix), rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
value={'source': 'VHS', 'other': 'Rip'}) value={'source': 'VHS', 'other': 'Rip'})
@ -92,8 +95,9 @@ def source(config): # pylint:disable=unused-argument
# WEBCap is a synonym to WEBRip, mostly used by non english # WEBCap is a synonym to WEBRip, mostly used by non english
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix), rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'}) value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'WEB', 'DL-?WEB', 'DL(?=-?Mux)'), rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
value={'source': 'Web'}) value={'source': 'Web'})
rebulk.regex('(WEB)', value='Web', tags='weak.source')
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix), rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
value={'source': 'HD-DVD', 'other': 'Rip'}) value={'source': 'HD-DVD', 'other': 'Rip'})
@ -118,7 +122,7 @@ def source(config): # pylint:disable=unused-argument
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix), rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
value={'source': 'Satellite', 'other': 'Rip'}) value={'source': 'Satellite', 'other': 'Rip'})
rebulk.rules(ValidateSource, UltraHdBlurayRule) rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
return rebulk return rebulk
@ -170,19 +174,22 @@ class UltraHdBlurayRule(Rule):
to_remove.append(match) to_remove.append(match)
to_append.append(new_source) to_append.append(new_source)
if to_remove or to_append:
return to_remove, to_append return to_remove, to_append
return False
class ValidateSource(Rule): class ValidateSourcePrefixSuffix(Rule):
""" """
Validate source with screener property, with video_codec property or separated Validate source with source prefix, source suffix.
""" """
priority = 64 priority = 64
consequence = RemoveMatch consequence = RemoveMatch
def when(self, matches, context): def when(self, matches, context):
ret = [] ret = []
for match in matches.named('source'): for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
match = match.initiator match = match.initiator
if not seps_before(match) and \ if not seps_before(match) and \
not matches.range(match.start - 1, match.start - 2, not matches.range(match.start - 1, match.start - 2,
@ -198,4 +205,31 @@ class ValidateSource(Rule):
ret.extend(match.children) ret.extend(match.children)
ret.append(match) ret.append(match)
continue continue
return ret
class ValidateWeakSource(Rule):
"""
Validate weak source
"""
dependency = [ValidateSourcePrefixSuffix]
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
# if there are more than 1 source in this filepart, just before the year and with holes for the title
# most likely the source is part of the title
if 'weak.source' in match.tags \
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
and matches.holes(filepart.start, match.start,
predicate=lambda m: m.value.strip(seps), index=-1):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret return ret

View file

@ -25,133 +25,13 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='streaming_service', tags=['source-prefix']) rebulk.defaults(name='streaming_service', tags=['source-prefix'])
rebulk.string('AE', 'A&E', value='A&E') for value, items in config.items():
rebulk.string('AMBC', value='ABC') patterns = items if isinstance(items, list) else [items]
rebulk.string('AUBC', value='ABC Australia') for pattern in patterns:
rebulk.string('AJAZ', value='Al Jazeera English') if pattern.startswith('re:'):
rebulk.string('AMC', value='AMC') rebulk.regex(pattern, value=value)
rebulk.string('AMZN', 'Amazon', value='Amazon Prime') else:
rebulk.regex('Amazon-?Prime', value='Amazon Prime') rebulk.string(pattern, value=value)
rebulk.string('AS', value='Adult Swim')
rebulk.regex('Adult-?Swim', value='Adult Swim')
rebulk.string('ATK', value="America's Test Kitchen")
rebulk.string('ANPL', value='Animal Planet')
rebulk.string('ANLB', value='AnimeLab')
rebulk.string('AOL', value='AOL')
rebulk.string('ARD', value='ARD')
rebulk.string('iP', value='BBC iPlayer')
rebulk.regex('BBC-?iPlayer', value='BBC iPlayer')
rebulk.string('BRAV', value='BravoTV')
rebulk.string('CNLP', value='Canal+')
rebulk.string('CN', value='Cartoon Network')
rebulk.string('CBC', value='CBC')
rebulk.string('CBS', value='CBS')
rebulk.string('CNBC', value='CNBC')
rebulk.string('CC', value='Comedy Central')
rebulk.string('4OD', value='Channel 4')
rebulk.string('CHGD', value='CHRGD')
rebulk.string('CMAX', value='Cinemax')
rebulk.string('CMT', value='Country Music Television')
rebulk.regex('Comedy-?Central', value='Comedy Central')
rebulk.string('CCGC', value='Comedians in Cars Getting Coffee')
rebulk.string('CR', value='Crunchy Roll')
rebulk.string('CRKL', value='Crackle')
rebulk.regex('Crunchy-?Roll', value='Crunchy Roll')
rebulk.string('CSPN', value='CSpan')
rebulk.string('CTV', value='CTV')
rebulk.string('CUR', value='CuriosityStream')
rebulk.string('CWS', value='CWSeed')
rebulk.string('DSKI', value='Daisuki')
rebulk.string('DHF', value='Deadhouse Films')
rebulk.string('DDY', value='Digiturk Diledigin Yerde')
rebulk.string('DISC', 'Discovery', value='Discovery')
rebulk.string('DSNY', 'Disney', value='Disney')
rebulk.string('DIY', value='DIY Network')
rebulk.string('DOCC', value='Doc Club')
rebulk.string('DPLY', value='DPlay')
rebulk.string('ETV', value='E!')
rebulk.string('EPIX', value='ePix')
rebulk.string('ETTV', value='El Trece')
rebulk.string('ESPN', value='ESPN')
rebulk.string('ESQ', value='Esquire')
rebulk.string('FAM', value='Family')
rebulk.string('FJR', value='Family Jr')
rebulk.string('FOOD', value='Food Network')
rebulk.string('FOX', value='Fox')
rebulk.string('FREE', value='Freeform')
rebulk.string('FYI', value='FYI Network')
rebulk.string('GLBL', value='Global')
rebulk.string('GLOB', value='GloboSat Play')
rebulk.string('HLMK', value='Hallmark')
rebulk.string('HBO', value='HBO Go')
rebulk.regex('HBO-?Go', value='HBO Go')
rebulk.string('HGTV', value='HGTV')
rebulk.string('HIST', 'History', value='History')
rebulk.string('HULU', value='Hulu')
rebulk.string('ID', value='Investigation Discovery')
rebulk.string('IFC', value='IFC')
rebulk.string('iTunes', 'iT', value='iTunes')
rebulk.string('ITV', value='ITV')
rebulk.string('KNOW', value='Knowledge Network')
rebulk.string('LIFE', value='Lifetime')
rebulk.string('MTOD', value='Motor Trend OnDemand')
rebulk.string('MNBC', value='MSNBC')
rebulk.string('MTV', value='MTV')
rebulk.string('NATG', value='National Geographic')
rebulk.regex('National-?Geographic', value='National Geographic')
rebulk.string('NBA', value='NBA TV')
rebulk.regex('NBA-?TV', value='NBA TV')
rebulk.string('NBC', value='NBC')
rebulk.string('NF', 'Netflix', value='Netflix')
rebulk.string('NFL', value='NFL')
rebulk.string('NFLN', value='NFL Now')
rebulk.string('GC', value='NHL GameCenter')
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
rebulk.string('NRK', value='Norsk Rikskringkasting')
rebulk.string('PBS', value='PBS')
rebulk.string('PBSK', value='PBS Kids')
rebulk.string('PSN', value='Playstation Network')
rebulk.string('PLUZ', value='Pluzz')
rebulk.string('RTE', value='RTE One')
rebulk.string('SBS', value='SBS (AU)')
rebulk.string('SESO', 'SeeSo', value='SeeSo')
rebulk.string('SHMI', value='Shomi')
rebulk.string('SPIK', value='Spike')
rebulk.string('SPKE', value='Spike TV')
rebulk.regex('Spike-?TV', value='Spike TV')
rebulk.string('SNET', value='Sportsnet')
rebulk.string('SPRT', value='Sprout')
rebulk.string('STAN', value='Stan')
rebulk.string('STZ', value='Starz')
rebulk.string('SVT', value='Sveriges Television')
rebulk.string('SWER', value='SwearNet')
rebulk.string('SYFY', value='Syfy')
rebulk.string('TBS', value='TBS')
rebulk.string('TFOU', value='TFou')
rebulk.string('CW', value='The CW')
rebulk.regex('The-?CW', value='The CW')
rebulk.string('TLC', value='TLC')
rebulk.string('TUBI', value='TubiTV')
rebulk.string('TV3', value='TV3 Ireland')
rebulk.string('TV4', value='TV4 Sweeden')
rebulk.string('TVL', value='TV Land')
rebulk.regex('TV-?Land', value='TV Land')
rebulk.string('UFC', value='UFC')
rebulk.string('UKTV', value='UKTV')
rebulk.string('UNIV', value='Univision')
rebulk.string('USAN', value='USA Network')
rebulk.string('VLCT', value='Velocity')
rebulk.string('VH1', value='VH1')
rebulk.string('VICE', value='Viceland')
rebulk.string('VMEO', value='Vimeo')
rebulk.string('VRV', value='VRV')
rebulk.string('WNET', value='W Network')
rebulk.string('WME', value='WatchMe')
rebulk.string('WWEN', value='WWE Network')
rebulk.string('XBOX', value='Xbox Video')
rebulk.string('YHOO', value='Yahoo')
rebulk.string('RED', value='YouTube Red')
rebulk.string('ZDF', value='ZDF')
rebulk.rules(ValidateStreamingService) rebulk.rules(ValidateStreamingService)
@ -161,7 +41,7 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
class ValidateStreamingService(Rule): class ValidateStreamingService(Rule):
"""Validate streaming service matches.""" """Validate streaming service matches."""
priority = 32 priority = 128
consequence = RemoveMatch consequence = RemoveMatch
def when(self, matches, context): def when(self, matches, context):

View file

@ -8,7 +8,12 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
from rebulk.formatters import formatters from rebulk.formatters import formatters
from .film import FilmTitleRule from .film import FilmTitleRule
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule from .language import (
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
SubtitleExtensionRule,
NON_SPECIFIC_LANGUAGES
)
from ..common import seps, title_seps from ..common import seps, title_seps
from ..common.comparators import marker_sorted from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function from ..common.expected import build_expected_function
@ -88,12 +93,19 @@ class TitleBaseRule(Rule):
:rtype: :rtype:
""" """
cropped_holes = [] cropped_holes = []
for hole in holes:
group_markers = matches.markers.named('group') group_markers = matches.markers.named('group')
for group_marker in group_markers:
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
if path_marker and path_marker.span == group_marker.span:
group_markers.remove(group_marker)
for hole in holes:
cropped_holes.extend(hole.crop(group_markers)) cropped_holes.extend(hole.crop(group_markers))
return cropped_holes return cropped_holes
def is_ignored(self, match): @staticmethod
def is_ignored(match):
""" """
Ignore matches when scanning for title (hole). Ignore matches when scanning for title (hole).
@ -130,7 +142,8 @@ class TitleBaseRule(Rule):
for outside in outside_matches: for outside in outside_matches:
other_languages.extend(matches.range(outside.start, outside.end, other_languages.extend(matches.range(outside.start, outside.end,
lambda c_match: c_match.name == match.name and lambda c_match: c_match.name == match.name and
c_match not in to_keep)) c_match not in to_keep and
c_match.value not in NON_SPECIFIC_LANGUAGES))
if not other_languages and (not starting or len(match.raw) <= 3): if not other_languages and (not starting or len(match.raw) <= 3):
return True return True
@ -239,7 +252,7 @@ class TitleBaseRule(Rule):
to_remove = [] to_remove = []
if matches.named(self.match_name, lambda match: 'expected' in match.tags): if matches.named(self.match_name, lambda match: 'expected' in match.tags):
return ret, to_remove return False
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches)) fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
if not self.filepart_filter or self.filepart_filter(filepart, matches)] if not self.filepart_filter or self.filepart_filter(filepart, matches)]
@ -272,7 +285,9 @@ class TitleBaseRule(Rule):
ret.extend(titles) ret.extend(titles)
to_remove.extend(to_remove_c) to_remove.extend(to_remove_c)
if ret or to_remove:
return ret, to_remove return ret, to_remove
return False
class TitleFromPosition(TitleBaseRule): class TitleFromPosition(TitleBaseRule):
@ -329,4 +344,6 @@ class PreferTitleWithYear(Rule):
for title_match in titles: for title_match in titles:
if title_match.value not in title_values: if title_match.value not in title_values:
to_remove.append(title_match) to_remove.append(title_match)
if to_remove or to_tag:
return to_remove, to_tag return to_remove, to_tag
return False

View file

@ -3,9 +3,8 @@
""" """
video_codec and video_profile property video_codec and video_profile property
""" """
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from ..common import dash from ..common import dash
from ..common.pattern import is_disabled from ..common.pattern import is_disabled
@ -43,7 +42,8 @@ def video_codec(config): # pylint:disable=unused-argument
# http://blog.mediacoderhq.com/h264-profiles-and-levels/ # http://blog.mediacoderhq.com/h264-profiles-and-levels/
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC # https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
rebulk.defaults(name="video_profile", rebulk.defaults(clear=True,
name="video_profile",
validator=seps_surround, validator=seps_surround,
disabled=lambda context: is_disabled(context, 'video_profile')) disabled=lambda context: is_disabled(context, 'video_profile'))
@ -66,7 +66,8 @@ def video_codec(config): # pylint:disable=unused-argument
rebulk.string('DXVA', value='DXVA', name='video_api', rebulk.string('DXVA', value='DXVA', name='video_api',
disabled=lambda context: is_disabled(context, 'video_api')) disabled=lambda context: is_disabled(context, 'video_api'))
rebulk.defaults(name='color_depth', rebulk.defaults(clear=True,
name='color_depth',
validator=seps_surround, validator=seps_surround,
disabled=lambda context: is_disabled(context, 'color_depth')) disabled=lambda context: is_disabled(context, 'color_depth'))
rebulk.regex('12.?bits?', value='12-bit') rebulk.regex('12.?bits?', value='12-bit')

View file

@ -67,7 +67,7 @@ def website(config):
""" """
Validator for next website matches Validator for next website matches
""" """
return any(name in ['season', 'episode', 'year'] for name in match.names) return match.named('season', 'episode', 'year')
def when(self, matches, context): def when(self, matches, context):
to_remove = [] to_remove = []
@ -80,6 +80,8 @@ def website(config):
if not safe: if not safe:
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0) suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
if suffix: if suffix:
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
if not group:
to_remove.append(website_match) to_remove.append(website_match)
return to_remove return to_remove

View file

@ -35,9 +35,9 @@
-cd: 1 -cd: 1
-cd_count: 3 -cd_count: 3
? This.Is.Us ? This.is.Us
: options: --exclude country : options: --exclude country
title: This Is Us title: This is Us
-country: US -country: US
? 2015.01.31 ? 2015.01.31
@ -286,9 +286,9 @@
: options: --exclude website : options: --exclude website
-website: wawa.co.uk -website: wawa.co.uk
? movie.mkv ? movie.mp4
: options: --exclude mimetype : options: --exclude mimetype
-mimetype: video/x-matroska -mimetype: video/mp4
? another movie.mkv ? another movie.mkv
: options: --exclude container : options: --exclude container

View file

@ -201,9 +201,9 @@
? Series/My Name Is Earl/My.Name.Is.Earl.S01Extras.-.Bad.Karma.DVDRip.XviD.avi ? Series/My Name Is Earl/My.Name.Is.Earl.S01Extras.-.Bad.Karma.DVDRip.XviD.avi
: title: My Name Is Earl : title: My Name Is Earl
season: 1 season: 1
episode_title: Extras - Bad Karma episode_title: Bad Karma
source: DVD source: DVD
other: Rip other: [Extras, Rip]
video_codec: Xvid video_codec: Xvid
? series/Freaks And Geeks/Season 1/Episode 4 - Kim Kelly Is My Friend-eng(1).srt ? series/Freaks And Geeks/Season 1/Episode 4 - Kim Kelly Is My Friend-eng(1).srt
@ -1917,9 +1917,11 @@
? Duck.Dynasty.S02E07.Streik.German.DOKU.DL.WS.DVDRiP.x264-CDP ? Duck.Dynasty.S02E07.Streik.German.DOKU.DL.WS.DVDRiP.x264-CDP
: episode: 7 : episode: 7
episode_title: Streik German episode_title: Streik
source: DVD source: DVD
language: mul language:
- German
- Multi
other: [Documentary, Widescreen, Rip] other: [Documentary, Widescreen, Rip]
release_group: CDP release_group: CDP
season: 2 season: 2
@ -1930,9 +1932,11 @@
? Family.Guy.S13E14.JOLO.German.AC3D.DL.720p.WebHD.x264-CDD ? Family.Guy.S13E14.JOLO.German.AC3D.DL.720p.WebHD.x264-CDD
: audio_codec: Dolby Digital : audio_codec: Dolby Digital
episode: 14 episode: 14
episode_title: JOLO German episode_title: JOLO
source: Web source: Web
language: mul language:
- German
- Multi
release_group: CDD release_group: CDD
screen_size: 720p screen_size: 720p
season: 13 season: 13
@ -3025,7 +3029,7 @@
title: Show Name title: Show Name
episode: [493, 494, 495, 496, 497, 498, 500, 501, 502, 503, 504, 505, 506, 507] episode: [493, 494, 495, 496, 497, 498, 500, 501, 502, 503, 504, 505, 506, 507]
screen_size: 720p screen_size: 720p
subtitle_language: fr other: Variable Frame Rate
video_codec: H.264 video_codec: H.264
audio_codec: AAC audio_codec: AAC
type: episode type: episode
@ -4525,3 +4529,165 @@
audio_codec: MP2 audio_codec: MP2
release_group: KIDKAT release_group: KIDKAT
type: episode type: episode
? Por Trece Razones - Temporada 2 [HDTV 720p][Cap.201][AC3 5.1 Castellano]/Por Trece Razones 2x01 [des202].mkv
: title: Por Trece Razones
season: 2
source: HDTV
screen_size: 720p
episode: 1
audio_codec: Dolby Digital
audio_channels: '5.1'
language: Catalan
release_group: des202
container: mkv
type: episode
? Cuerpo de Elite - Temporada 1 [HDTV 720p][Cap.113][AC3 5.1 Esp Castellano]\CuerpoDeElite720p_113_desca202.mkv
: title: Cuerpo de Elite
season: 1
source: HDTV
screen_size: 720p
episode: 13
audio_codec: Dolby Digital
audio_channels: '5.1'
language:
- Spanish
- Catalan
container: mkv
type: episode
? Show.Name.S01E01.St.Patricks.Day.1080p.mkv
: title: Show Name
season: 1
episode: 1
episode_title: St Patricks Day
screen_size: 1080p
container: mkv
type: episode
? Show.Name.S01E01.St.Patricks.Day.1080p-grp.mkv
: title: Show Name
season: 1
episode: 1
episode_title: St Patricks Day
screen_size: 1080p
release_group: grp
container: mkv
type: episode
? Titans.2018.S01E09.Hank.And.Dawn.720p.DCU.WEB-DL.AAC2.0.H264-NTb
: title: Titans
year: 2018
season: 1
episode: 9
episode_title: Hank And Dawn
screen_size: 720p
streaming_service: DC Universe
source: Web
audio_codec: AAC
audio_channels: '2.0'
video_codec: H.264
release_group: NTb
type: episode
? S.W.A.T.2017.S01E21.Treibjagd.German.Dubbed.DL.AmazonHD.x264-TVS
: title: S.W.A.T.
year: 2017
season: 1
episode: 21
episode_title: Treibjagd
language:
- German
- Multi
streaming_service: Amazon Prime
other: HD
video_codec: H.264
release_group: TVS
type: episode
? S.W.A.T.2017.S01E16.READNFO.720p.HDTV.x264-KILLERS
: title: S.W.A.T.
year: 2017
season: 1
episode: 16
other: Read NFO
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: KILLERS
type: episode
? /mnt/NAS/NoSubsTVShows/Babylon 5/Season 01/Ep. 02 - Soul Hunter
: title: Babylon 5
season: 1
episode: 2
episode_title: Soul Hunter
type: episode
? This.is.Us.S01E01.HDTV.x264-KILLERS.mkv
: title: This is Us
season: 1
episode: 1
source: HDTV
video_codec: H.264
release_group: KILLERS
container: mkv
type: episode
? Videos/Office1080/The Office (US) (2005) Season 2 S02 + Extras (1080p AMZN WEB-DL x265 HEVC 10bit AAC 2.0 LION)/The Office (US) (2005) - S02E12 - The Injury (1080p AMZN WEB-DL x265 LION).mkv
: title: The Office
country: US
year: 2005
season: 2
other: Extras
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
video_codec: H.265
video_profile: High Efficiency Video Coding
color_depth: 10-bit
audio_codec: AAC
audio_channels: '2.0'
release_group: LION
episode: 12
episode_title: The Injury
container: mkv
type: episode
? Thumping.Spike.2.E01.DF.WEBRip.720p-DRAMATV.mp4
: title: Thumping Spike 2
episode: 1
source: Web
other: Rip
screen_size: 720p
streaming_service: DramaFever
release_group: DRAMATV
container: mp4
mimetype: video/mp4
type: episode
? About.Time.E01.1080p.VIKI.WEB-DL-BLUEBERRY.mp4
: title: About Time
episode: 1
screen_size: 1080p
streaming_service: Viki
source: Web
release_group: BLUEBERRY
container: mp4
mimetype: video/mp4
type: episode
? Eyes.Of.Dawn.1991.E01.480p.MBCVOD.AAC.x264-NOGPR.mp4
: title: Eyes Of Dawn
year: 1991
season: 1991
episode: 1
screen_size: 480p
streaming_service: MBC
audio_codec: AAC
video_codec: H.264
release_group: NOGPR
container: mp4
mimetype: video/mp4
type: episode

View file

@ -815,10 +815,12 @@
? Das.Appartement.German.AC3D.DL.720p.BluRay.x264-TVP ? Das.Appartement.German.AC3D.DL.720p.BluRay.x264-TVP
: audio_codec: Dolby Digital : audio_codec: Dolby Digital
source: Blu-ray source: Blu-ray
language: mul language:
- German
- Multi
release_group: TVP release_group: TVP
screen_size: 720p screen_size: 720p
title: Das Appartement German title: Das Appartement
type: movie type: movie
video_codec: H.264 video_codec: H.264
@ -1723,7 +1725,7 @@
? Ant-Man.and.the.Wasp.2018.Digital.Extras.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv ? Ant-Man.and.the.Wasp.2018.Digital.Extras.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv
: title: Ant-Man and the Wasp : title: Ant-Man and the Wasp
year: 2018 year: 2018
alternative_title: Digital Extras other: Extras
screen_size: 1080p screen_size: 1080p
streaming_service: Amazon Prime streaming_service: Amazon Prime
source: Web source: Web
@ -1771,3 +1773,14 @@
video_codec: H.264 video_codec: H.264
release_group: CMRG release_group: CMRG
type: movie type: movie
? The.Girl.in.the.Spiders.Web.2019.1080p.WEB-DL.x264.AC3-EVO.mkv
: title: The Girl in the Spiders Web
year: 2019
screen_size: 1080p
source: Web
video_codec: H.264
audio_codec: Dolby Digital
release_group: EVO
container: mkv
type: movie

View file

@ -0,0 +1,467 @@
? is
: title: is
? it
: title: it
? am
: title: am
? mad
: title: mad
? men
: title: men
? man
: title: man
? run
: title: run
? sin
: title: sin
? st
: title: st
? to
: title: to
? 'no'
: title: 'no'
? non
: title: non
? war
: title: war
? min
: title: min
? new
: title: new
? car
: title: car
? day
: title: day
? bad
: title: bad
? bat
: title: bat
? fan
: title: fan
? fry
: title: fry
? cop
: title: cop
? zen
: title: zen
? gay
: title: gay
? fat
: title: fat
? one
: title: one
? cherokee
: title: cherokee
? got
: title: got
? an
: title: an
? as
: title: as
? cat
: title: cat
? her
: title: her
? be
: title: be
? hat
: title: hat
? sun
: title: sun
? may
: title: may
? my
: title: my
? mr
: title: mr
? rum
: title: rum
? pi
: title: pi
? bb
: title: bb
? bt
: title: bt
? tv
: title: tv
? aw
: title: aw
? by
: title: by
? md
: other: Mic Dubbed
? mp
: title: mp
? cd
: title: cd
? in
: title: in
? ad
: title: ad
? ice
: title: ice
? ay
: title: ay
? at
: title: at
? star
: title: star
? so
: title: so
? he
: title: he
? do
: title: do
? ax
: title: ax
? mx
: title: mx
? bas
: title: bas
? de
: title: de
? le
: title: le
? son
: title: son
? ne
: title: ne
? ca
: title: ca
? ce
: title: ce
? et
: title: et
? que
: title: que
? mal
: title: mal
? est
: title: est
? vol
: title: vol
? or
: title: or
? mon
: title: mon
? se
: title: se
? je
: title: je
? tu
: title: tu
? me
: title: me
? ma
: title: ma
? va
: title: va
? au
: country: AU
? lu
: title: lu
? wa
: title: wa
? ga
: title: ga
? ao
: title: ao
? la
: title: la
? el
: title: el
? del
: title: del
? por
: title: por
? mar
: title: mar
? al
: title: al
? un
: title: un
? ind
: title: ind
? arw
: title: arw
? ts
: source: Telesync
? ii
: title: ii
? bin
: title: bin
? chan
: title: chan
? ss
: title: ss
? san
: title: san
? oss
: title: oss
? iii
: title: iii
? vi
: title: vi
? ben
: title: ben
? da
: title: da
? lt
: title: lt
? ch
: title: ch
? sr
: title: sr
? ps
: title: ps
? cx
: title: cx
? vo
: title: vo
? mkv
: container: mkv
? avi
: container: avi
? dmd
: title: dmd
? the
: title: the
? dis
: title: dis
? cut
: title: cut
? stv
: title: stv
? des
: title: des
? dia
: title: dia
? and
: title: and
? cab
: title: cab
? sub
: title: sub
? mia
: title: mia
? rim
: title: rim
? las
: title: las
? une
: title: une
? par
: title: par
? srt
: container: srt
? ano
: title: ano
? toy
: title: toy
? job
: title: job
? gag
: title: gag
? reel
: title: reel
? www
: title: www
? for
: title: for
? ayu
: title: ayu
? csi
: title: csi
? ren
: title: ren
? moi
: title: moi
? sur
: title: sur
? fer
: title: fer
? fun
: title: fun
? two
: title: two
? big
: title: big
? psy
: title: psy
? air
: title: air
? brazil
: title: brazil
? jordan
: title: jordan
? bs
: title: bs
? kz
: title: kz
? gt
: title: gt
? im
: title: im
? pt
: language: pt
? scr
: title: scr
? sd
: title: sd
? hr
: other: High Resolution

View file

@ -5,8 +5,8 @@
: country: US : country: US
title: this is title title: this is title
? This.is.us.title ? This.is.Us
: title: This is us title : title: This is Us
? This.Is.Us ? This.Is.Us
: options: --no-default-config : options: --no-default-config

View file

@ -48,7 +48,7 @@
proper_count: 3 proper_count: 3
? Proper ? Proper.720p
? +Repack ? +Repack
? +Rerip ? +Rerip
: other: Proper : other: Proper
@ -80,7 +80,7 @@
? Remux ? Remux
: other: Remux : other: Remux
? 3D ? 3D.2019
: other: 3D : other: 3D
? HD ? HD

View file

@ -0,0 +1,21 @@
{
"titles": [
"13 Reasons Why",
"Star Wars: Episode VII - The Force Awakens",
"3%",
"The 100",
"3 Percent",
"This is Us",
"Open Season 2",
"Game of Thrones",
"The X-Files",
"11.22.63"
],
"suggested": [
"13 Reasons Why",
"Star Wars: Episode VII - The Force Awakens",
"The 100",
"Open Season 2",
"11.22.63"
]
}

View file

@ -1,13 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement # pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement
import json
import os import os
import sys
import pytest import pytest
import six import six
from ..api import guessit, properties, GuessitException from ..api import guessit, properties, suggested_expected, GuessitException
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
@ -27,12 +28,16 @@ def test_forced_binary():
assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type) assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type)
@pytest.mark.skipif('sys.version_info < (3, 4)', reason="Path is not available") @pytest.mark.skipif(sys.version_info < (3, 4), reason="Path is not available")
def test_pathlike_object(): def test_pathlike_object():
try:
from pathlib import Path from pathlib import Path
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv') path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
ret = guessit(path) ret = guessit(path)
assert ret and 'title' in ret assert ret and 'title' in ret
except ImportError: # pragma: no-cover
pass
def test_unicode_japanese(): def test_unicode_japanese():
@ -69,3 +74,10 @@ def test_exception():
assert "An internal error has occured in guessit" in str(excinfo.value) assert "An internal error has occured in guessit" in str(excinfo.value)
assert "Guessit Exception Report" in str(excinfo.value) assert "Guessit Exception Report" in str(excinfo.value)
assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value) assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value)
def test_suggested_expected():
with open(os.path.join(__location__, 'suggested.json'), 'r') as f:
content = json.load(f)
actual = suggested_expected(content['titles'])
assert actual == content['suggested']

View file

@ -7,9 +7,8 @@ import os
from io import open # pylint: disable=redefined-builtin from io import open # pylint: disable=redefined-builtin
import babelfish import babelfish
import pytest import six # pylint:disable=wrong-import-order
import six import yaml # pylint:disable=wrong-import-order
import yaml
from rebulk.remodule import re from rebulk.remodule import re
from rebulk.utils import is_iterable from rebulk.utils import is_iterable
@ -21,13 +20,6 @@ logger = logging.getLogger(__name__)
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
filename_predicate = None
string_predicate = None
# filename_predicate = lambda filename: 'episode_title' in filename
# string_predicate = lambda string: '-DVD.BlablaBla.Fix.Blablabla.XVID' in string
class EntryResult(object): class EntryResult(object):
def __init__(self, string, negates=False): def __init__(self, string, negates=False):
@ -134,21 +126,24 @@ class TestYml(object):
options_re = re.compile(r'^([ +-]+)(.*)') options_re = re.compile(r'^([ +-]+)(.*)')
files, ids = files_and_ids(filename_predicate) def _get_unique_id(self, collection, base_id):
ret = base_id
i = 2
while ret in collection:
suffix = "-" + str(i)
ret = base_id + suffix
i += 1
return ret
@staticmethod def pytest_generate_tests(self, metafunc):
def set_default(expected, default): if 'yml_test_case' in metafunc.fixturenames:
if default: entries = []
for k, v in default.items(): entry_ids = []
if k not in expected: entry_set = set()
expected[k] = v
@pytest.mark.parametrize('filename', files, ids=ids) for filename, _ in zip(*files_and_ids()):
def test(self, filename, caplog):
caplog.set_level(logging.INFO)
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile: with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
data = yaml.load(infile, OrderedDictYAMLLoader) data = yaml.load(infile, OrderedDictYAMLLoader)
entries = Results()
last_expected = None last_expected = None
for string, expected in reversed(list(data.items())): for string, expected in reversed(list(data.items())):
@ -166,11 +161,24 @@ class TestYml(object):
for string, expected in data.items(): for string, expected in data.items():
TestYml.set_default(expected, default) TestYml.set_default(expected, default)
entry = self.check_data(filename, string, expected) string = TestYml.fix_encoding(string, expected)
entries.append(entry)
entries.assert_ok()
def check_data(self, filename, string, expected): entries.append((filename, string, expected))
unique_id = self._get_unique_id(entry_set, '[' + filename + '] ' + str(string))
entry_set.add(unique_id)
entry_ids.append(unique_id)
metafunc.parametrize('yml_test_case', entries, ids=entry_ids)
@staticmethod
def set_default(expected, default):
if default:
for k, v in default.items():
if k not in expected:
expected[k] = v
@classmethod
def fix_encoding(cls, string, expected):
if six.PY2: if six.PY2:
if isinstance(string, six.text_type): if isinstance(string, six.text_type):
string = string.encode('utf-8') string = string.encode('utf-8')
@ -183,7 +191,14 @@ class TestYml(object):
expected[k] = v expected[k] = v
if not isinstance(string, str): if not isinstance(string, str):
string = str(string) string = str(string)
if not string_predicate or string_predicate(string): # pylint: disable=not-callable return string
def test_entry(self, yml_test_case):
filename, string, expected = yml_test_case
result = self.check_data(filename, string, expected)
assert not result.error
def check_data(self, filename, string, expected):
entry = self.check(string, expected) entry = self.check(string, expected)
if entry.ok: if entry.ok:
logger.debug('[%s] %s', filename, entry) logger.debug('[%s] %s', filename, entry)

View file

@ -946,3 +946,254 @@
source: Blu-ray source: Blu-ray
audio_codec: DTS-HD audio_codec: DTS-HD
type: movie type: movie
? Mr Robot - S03E01 - eps3 0 power-saver-mode h (1080p AMZN WEB-DL x265 HEVC 10bit EAC3 6.0 RCVR).mkv
: title: Mr Robot
season: 3
episode: 1
episode_title: eps3 0 power-saver-mode h
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
video_codec: H.265
video_profile: High Efficiency Video Coding
color_depth: 10-bit
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
release_group: RCVR
container: mkv
type: episode
? Panorama.15-05-2018.Web-DL.540p.H264.AAC.Subs.mp4
: title: Panorama
date: 2018-05-15
source: Web
screen_size: 540p
video_codec: H.264
audio_codec: AAC
subtitle_language: und
container: mp4
type: episode
? Shaolin 2011.720p.BluRay.x264-x0r.mkv
: title: Shaolin
year: 2011
screen_size: 720p
source: Blu-ray
video_codec: H.264
release_group: x0r
container: mkv
type: movie
? '[ Engineering Catastrophes S02E10 1080p AMZN WEB-DL DD+ 2.0 x264-TrollHD ]'
: title: Engineering Catastrophes
season: 2
episode: 10
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '2.0'
video_codec: H.264
release_group: TrollHD
type: episode
? A Very Harold & Kumar 3D Christmas (2011).mkv
: title: A Very Harold & Kumar 3D Christmas
year: 2011
container: mkv
type: movie
? Cleveland.Hustles.S01E03.Downward.Dogs.and.Proper.Pigs.720p.HDTV.x264-W4F
: title: Cleveland Hustles
season: 1
episode: 3
episode_title: Downward Dogs and Proper Pigs
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: W4F
type: episode
? Pawn.Stars.S12E20.The.Pawn.Awakens.REAL.READ.NFO.720p.HDTV.x264-DHD
: title: Pawn Stars
season: 12
episode: 20
episode_title: The Pawn Awakens
other:
- Proper
- Read NFO
proper_count: 2
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: DHD
type: episode
? Pawn.Stars.S12E22.Racing.Revolution.REAL.720p.HDTV.x264-DHD
: title: Pawn Stars
season: 12
episode: 22
episode_title: Racing Revolution
other: Proper
proper_count: 2
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: DHD
type: episode
? Luksusfellen.S18E02.REAL.NORWEGiAN.720p.WEB.h264-NORPiLT
: title: Luksusfellen
season: 18
episode: 2
other: Proper
proper_count: 2
language: Norwegian
screen_size: 720p
source: Web
video_codec: H.264
release_group: NORPiLT
type: episode
? The.Exorcist.S02E07.REAL.FRENCH.720p.HDTV.x264-SH0W
: title: The Exorcist
season: 2
episode: 7
other: Proper
proper_count: 2
language: fr
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: SH0W
type: episode
? Outrageous.Acts.of.Science.S05E02.Is.This.for.Real.720p.HDTV.x264-DHD
: title: Outrageous Acts of Science
season: 5
episode: 2
# corner case
# episode_title: Is This for Real
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: DHD
type: episode
? How.the.Universe.Works.S06E08.Strange.Lives.of.Dwarf.Planets.REAL.720p.WEB.x264-DHD
: title: How the Universe Works
season: 6
episode: 8
episode_title: Strange Lives of Dwarf Planets
other: Proper
proper_count: 2
screen_size: 720p
source: Web
video_codec: H.264
release_group: DHD
type: episode
? Vampirina.S01E16.REAL.HDTV.x264-W4F
: title: Vampirina
season: 1
episode: 16
other: Proper
proper_count: 2
source: HDTV
video_codec: H.264
release_group: W4F
type: episode
? Test.S01E16.Some Real Episode Title.HDTV.x264-W4F
: title: Test
season: 1
episode: 16
episode_title: Some Real Episode Title
source: HDTV
video_codec: H.264
release_group: W4F
type: episode
? NOS4A2.S01E01.The.Shorter.Way.REPACK.720p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv
: title: NOS4A2
season: 1
episode: 1
episode_title: The Shorter Way
other: Proper
proper_count: 1
screen_size: 720p
streaming_service: Amazon Prime
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
video_codec: H.264
release_group: NTG
container: mkv
type: episode
? Star Trek DS9 Ep 2x03 The Siege (Part III)
: title: Star Trek DS9
season: 2
episode: 3
episode_title: The Siege
part: 3
type: episode
? The.Red.Line.S01E01
: title: The Red Line
season: 1
episode: 1
type: episode
? Show.S01E01.WEB.x264-METCON.mkv
: title: Show
season: 1
episode: 1
source: Web
video_codec: H.264
release_group: METCON
container: mkv
type: episode
? Show.S01E01.WEB.x264-TCMEON.mkv
: title: Show
season: 1
episode: 1
source: Web
video_codec: H.264
release_group: TCMEON
container: mkv
type: episode
? Show.S01E01.WEB.x264-MEONTC.mkv
: title: Show
season: 1
episode: 1
source: Web
video_codec: H.264
release_group: MEONTC
container: mkv
type: episode
? '[TorrentCouch.com].Westworld.S02.Complete.720p.WEB-DL.x264.[MP4].[5.3GB].[Season.2.Full]/[TorrentCouch.com].Westworld.S02E03.720p.WEB-DL.x264.mp4'
: website: TorrentCouch.com
title: Westworld
season: 2
other: Complete
screen_size: 720p
source: Web
video_codec: H.264
container: mp4
size: 5.3GB
episode: 3
type: episode
? Vita.&.Virginia.2018.720p.H.264.YTS.LT.mp4
: title: Vita & Virginia
year: 2018
screen_size: 720p
video_codec: H.264
release_group: YTS.LT
container: mp4
type: movie

View file

@ -10,19 +10,19 @@ except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error from ordereddict import OrderedDict # pylint:disable=import-error
import babelfish import babelfish
import yaml import yaml # pylint:disable=wrong-import-order
from .rules.common.quantity import BitRate, FrameRate, Size from .rules.common.quantity import BitRate, FrameRate, Size
class OrderedDictYAMLLoader(yaml.Loader): class OrderedDictYAMLLoader(yaml.SafeLoader):
""" """
A YAML loader that loads mappings into ordered dictionaries. A YAML loader that loads mappings into ordered dictionaries.
From https://gist.github.com/enaeseth/844388 From https://gist.github.com/enaeseth/844388
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
yaml.Loader.__init__(self, *args, **kwargs) yaml.SafeLoader.__init__(self, *args, **kwargs)
self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map) self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)
self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map) self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map)
@ -58,7 +58,7 @@ class CustomDumper(yaml.SafeDumper):
""" """
Custom YAML Dumper. Custom YAML Dumper.
""" """
pass pass # pylint:disable=unnecessary-pass
def default_representer(dumper, data): def default_representer(dumper, data):

View file

@ -4,4 +4,4 @@
Version module Version module
""" """
# pragma: no cover # pragma: no cover
__version__ = '1.0.0' __version__ = '2.0.1'

View file

@ -0,0 +1,217 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Base builder class for Rebulk
"""
from abc import ABCMeta, abstractmethod
from copy import deepcopy
from logging import getLogger
from six import add_metaclass
from .loose import set_defaults
from .pattern import RePattern, StringPattern, FunctionalPattern
log = getLogger(__name__).log
@add_metaclass(ABCMeta)
class Builder(object):
"""
Base builder class for patterns
"""
def __init__(self):
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
self._chain_defaults = {}
def reset(self):
"""
Reset all defaults.
:return:
"""
self.__init__()
def defaults(self, **kwargs):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._defaults, override=True)
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._regex_defaults, override=True)
return self
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._string_defaults, override=True)
return self
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._functional_defaults, override=True)
return self
def chain_defaults(self, **kwargs):
"""
Define default keyword arguments for patterns chain.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._chain_defaults, override=True)
return self
def build_re(self, *pattern, **kwargs):
"""
Builds a new regular expression pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return RePattern(*pattern, **kwargs)
def build_string(self, *pattern, **kwargs):
"""
Builds a new string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._string_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return StringPattern(*pattern, **kwargs)
def build_functional(self, *pattern, **kwargs):
"""
Builds a new functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return FunctionalPattern(*pattern, **kwargs)
def build_chain(self, **kwargs):
"""
Builds a new patterns chain
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
from .chain import Chain
set_defaults(self._chain_defaults, kwargs)
set_defaults(self._defaults, kwargs)
chain = Chain(self, **kwargs)
chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
return chain
@abstractmethod
def pattern(self, *pattern):
"""
Register a list of Pattern instance
:param pattern:
:return:
"""
pass
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
return self.pattern(self.build_re(*pattern, **kwargs))
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
return self.pattern(self.build_string(*pattern, **kwargs))
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
functional = self.build_functional(*pattern, **kwargs)
return self.pattern(functional)
def chain(self, **kwargs):
"""
Add patterns chain, using configuration of this rebulk
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
chain = self.build_chain(**kwargs)
self.pattern(chain)
return chain

View file

@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group
# pylint: disable=super-init-not-called # pylint: disable=super-init-not-called
import itertools import itertools
from .loose import call, set_defaults from .builder import Builder
from .loose import call
from .match import Match, Matches from .match import Match, Matches
from .pattern import Pattern, filter_match_kwargs from .pattern import Pattern, filter_match_kwargs, BasePattern
from .remodule import re from .remodule import re
@ -19,150 +20,46 @@ class _InvalidChainException(Exception):
pass pass
class Chain(Pattern): class Chain(Pattern, Builder):
""" """
Definition of a pattern chain to search for. Definition of a pattern chain to search for.
""" """
def __init__(self, rebulk, chain_breaker=None, **kwargs): def __init__(self, parent, chain_breaker=None, **kwargs):
call(super(Chain, self).__init__, **kwargs) Builder.__init__(self)
call(Pattern.__init__, self, **kwargs)
self._kwargs = kwargs self._kwargs = kwargs
self._match_kwargs = filter_match_kwargs(kwargs) self._match_kwargs = filter_match_kwargs(kwargs)
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
if callable(chain_breaker): if callable(chain_breaker):
self.chain_breaker = chain_breaker self.chain_breaker = chain_breaker
else: else:
self.chain_breaker = None self.chain_breaker = None
self.rebulk = rebulk self.parent = parent
self.parts = [] self.parts = []
def defaults(self, **kwargs): def pattern(self, *pattern):
""" """
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._defaults = kwargs
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._regex_defaults = kwargs
return self
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._string_defaults = kwargs
return self
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._functional_defaults = kwargs
return self
def chain(self):
"""
Add patterns chain, using configuration from this chain
:return:
:rtype:
"""
# pylint: disable=protected-access
chain = self.rebulk.chain(**self._kwargs)
chain._defaults = dict(self._defaults)
chain._regex_defaults = dict(self._regex_defaults)
chain._functional_defaults = dict(self._functional_defaults)
chain._string_defaults = dict(self._string_defaults)
return chain
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern: :param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return: :return:
:rtype:
""" """
set_defaults(self._kwargs, kwargs) if not pattern:
set_defaults(self._regex_defaults, kwargs) raise ValueError("One pattern should be given to the chain")
set_defaults(self._defaults, kwargs) if len(pattern) > 1:
pattern = self.rebulk.build_re(*pattern, **kwargs) raise ValueError("Only one pattern can be given to the chain")
part = ChainPart(self, pattern) part = ChainPart(self, pattern[0])
self.parts.append(part)
return part
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_functional(*pattern, **kwargs)
part = ChainPart(self, pattern)
self.parts.append(part)
return part
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_string(*pattern, **kwargs)
part = ChainPart(self, pattern)
self.parts.append(part) self.parts.append(part)
return part return part
def close(self): def close(self):
""" """
Close chain builder to continue registering other pattern Deeply close the chain
:return: Rebulk instance
:return:
:rtype:
""" """
return self.rebulk parent = self.parent
while isinstance(parent, Chain):
parent = parent.parent
return parent
def _match(self, pattern, input_string, context=None): def _match(self, pattern, input_string, context=None):
# pylint: disable=too-many-locals,too-many-nested-blocks # pylint: disable=too-many-locals,too-many-nested-blocks
@ -173,42 +70,20 @@ class Chain(Pattern):
chain_found = False chain_found = False
current_chain_matches = [] current_chain_matches = []
valid_chain = True valid_chain = True
is_chain_start = True
for chain_part in self.parts: for chain_part in self.parts:
try: try:
chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part, chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
chain_input_string, context,
context) with_raw_matches=True)
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
if raw_chain_part_matches:
grouped_matches_dict = dict()
for match_index, match in itertools.groupby(chain_part_matches,
lambda m: m.match_index):
grouped_matches_dict[match_index] = list(match)
grouped_raw_matches_dict = dict()
for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
lambda m: m.match_index):
grouped_raw_matches_dict[match_index] = list(raw_match)
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
chain_found = True
offset = grouped_raw_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
grouped_matches = grouped_matches_dict.get(match_index, [])
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
current_chain_matches.extend(grouped_matches)
chain_found, chain_input_string, offset = \
self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
input_string, chain_input_string, offset, current_chain_matches)
except _InvalidChainException: except _InvalidChainException:
valid_chain = False valid_chain = False
if current_chain_matches: if current_chain_matches:
offset = current_chain_matches[0].raw_end offset = current_chain_matches[0].raw_end
break break
is_chain_start = False
if not chain_found: if not chain_found:
break break
if current_chain_matches and valid_chain: if current_chain_matches and valid_chain:
@ -217,38 +92,66 @@ class Chain(Pattern):
return chain_matches return chain_matches
def _match_parent(self, match, yield_parent): def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
input_string, chain_input_string, offset, current_chain_matches):
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
if raw_chain_part_matches:
grouped_matches_dict = self._group_by_match_index(chain_part_matches)
grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
chain_found = True
offset = grouped_raw_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
grouped_matches = grouped_matches_dict.get(match_index, [])
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
current_chain_matches.extend(grouped_matches)
return chain_found, chain_input_string, offset
def _process_match(self, match, match_index, child=False):
""" """
Handle a parent match Handle a match
:param match: :param match:
:type match: :type match:
:param yield_parent: :param match_index:
:type yield_parent: :type match_index:
:param child:
:type child:
:return: :return:
:rtype: :rtype:
""" """
ret = super(Chain, self)._match_parent(match, yield_parent) # pylint: disable=too-many-locals
ret = super(Chain, self)._process_match(match, match_index, child=child)
if ret:
return True
if match.children:
last_pattern = match.children[-1].pattern
last_pattern_groups = self._group_by_match_index(
[child_ for child_ in match.children if child_.pattern == last_pattern]
)
if last_pattern_groups:
original_children = Matches(match.children) original_children = Matches(match.children)
original_end = match.end original_end = match.end
while not ret and match.children:
last_pattern = match.children[-1].pattern
last_pattern_children = [child for child in match.children if child.pattern == last_pattern]
last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index)
last_pattern_groups = {}
for index, matches in last_pattern_groups_iter:
last_pattern_groups[index] = list(matches)
for index in reversed(list(last_pattern_groups)): for index in reversed(list(last_pattern_groups)):
last_matches = list(last_pattern_groups[index]) last_matches = last_pattern_groups[index]
for last_match in last_matches: for last_match in last_matches:
match.children.remove(last_match) match.children.remove(last_match)
match.end = match.children[-1].end if match.children else match.start match.end = match.children[-1].end if match.children else match.start
ret = super(Chain, self)._match_parent(match, yield_parent) ret = super(Chain, self)._process_match(match, match_index, child=child)
if ret: if ret:
return True return True
match.children = original_children match.children = original_children
match.end = original_end match.end = original_end
return ret
return False
def _build_chain_match(self, current_chain_matches, input_string): def _build_chain_match(self, current_chain_matches, input_string):
start = None start = None
@ -282,46 +185,11 @@ class Chain(Pattern):
Chain._fix_matches_offset(chain_part_match.children, input_string, offset) Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
@staticmethod @staticmethod
def _match_chain_part(is_chain_start, chain_part, chain_input_string, context): def _group_by_match_index(matches):
chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context, grouped_matches_dict = dict()
with_raw_matches=True) for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, grouped_matches_dict[match_index] = list(match)
chain_input_string) return grouped_matches_dict
raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part,
chain_input_string)
Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part)
return chain_part_matches, raw_chain_part_matches
@staticmethod
def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string):
if not chain_part_matches:
return chain_part_matches
if not is_chain_start:
separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
if separator:
return []
j = 1
for i in range(0, len(chain_part_matches) - 1):
separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
chain_part_matches[i + 1].initiator.raw_start]
if separator:
break
j += 1
truncated = chain_part_matches[:j]
if chain_part.repeater_end is not None:
truncated = [m for m in truncated if m.match_index < chain_part.repeater_end]
return truncated
@staticmethod
def _validate_chain_part_matches(chain_part_matches, chain_part):
max_match_index = -1
if chain_part_matches:
max_match_index = max([m.match_index for m in chain_part_matches])
if max_match_index + 1 < chain_part.repeater_start:
raise _InvalidChainException
@property @property
def match_options(self): def match_options(self):
@ -338,7 +206,7 @@ class Chain(Pattern):
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts) return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
class ChainPart(object): class ChainPart(BasePattern):
""" """
Part of a pattern chain. Part of a pattern chain.
""" """
@ -350,6 +218,51 @@ class ChainPart(object):
self.repeater_end = 1 self.repeater_end = 1
self._hidden = False self._hidden = False
@property
def _is_chain_start(self):
return self._chain.parts[0] == self
def matches(self, input_string, context=None, with_raw_matches=False):
matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
matches = self._truncate_repeater(matches, input_string)
raw_matches = self._truncate_repeater(raw_matches, input_string)
self._validate_repeater(raw_matches)
if with_raw_matches:
return matches, raw_matches
return matches
def _truncate_repeater(self, matches, input_string):
if not matches:
return matches
if not self._is_chain_start:
separator = input_string[0:matches[0].initiator.raw_start]
if separator:
return []
j = 1
for i in range(0, len(matches) - 1):
separator = input_string[matches[i].initiator.raw_end:
matches[i + 1].initiator.raw_start]
if separator:
break
j += 1
truncated = matches[:j]
if self.repeater_end is not None:
truncated = [m for m in truncated if m.match_index < self.repeater_end]
return truncated
def _validate_repeater(self, matches):
max_match_index = -1
if matches:
max_match_index = max([m.match_index for m in matches])
if max_match_index + 1 < self.repeater_start:
raise _InvalidChainException
def chain(self): def chain(self):
""" """
Add patterns chain, using configuration from this chain Add patterns chain, using configuration from this chain

View file

@ -15,9 +15,19 @@ def formatters(*chained_formatters):
:return: :return:
:rtype: :rtype:
""" """
def formatters_chain(input_string): # pylint:disable=missing-docstring def formatters_chain(input_string): # pylint:disable=missing-docstring
for chained_formatter in chained_formatters: for chained_formatter in chained_formatters:
input_string = chained_formatter(input_string) input_string = chained_formatter(input_string)
return input_string return input_string
return formatters_chain return formatters_chain
def default_formatter(input_string):
"""
Default formatter
:param input_string:
:return:
"""
return input_string

View file

@ -3,7 +3,7 @@
""" """
Introspect rebulk object to retrieve capabilities. Introspect rebulk object to retrieve capabilities.
""" """
from abc import ABCMeta, abstractproperty from abc import ABCMeta, abstractmethod
from collections import defaultdict from collections import defaultdict
import six import six
@ -16,7 +16,8 @@ class Description(object):
""" """
Abstract class for a description. Abstract class for a description.
""" """
@abstractproperty @property
@abstractmethod
def properties(self): # pragma: no cover def properties(self): # pragma: no cover
""" """
Properties of described object. Properties of described object.

View file

@ -4,12 +4,12 @@
Various utilities functions Various utilities functions
""" """
import sys import sys
import inspect
from inspect import isclass
try: try:
from inspect import getfullargspec as getargspec from inspect import getfullargspec as getargspec
_fullargspec_supported = True _fullargspec_supported = True
except ImportError: except ImportError:
_fullargspec_supported = False _fullargspec_supported = False
@ -55,8 +55,8 @@ def call(function, *args, **kwargs):
:return: sale vakye as default function call :return: sale vakye as default function call
:rtype: object :rtype: object
""" """
func = constructor_args if inspect.isclass(function) else function_args func = constructor_args if isclass(function) else function_args
call_args, call_kwargs = func(function, *args, **kwargs) call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20
return function(*call_args, **call_kwargs) return function(*call_args, **call_kwargs)
@ -145,6 +145,8 @@ if not _fullargspec_supported:
else: else:
call_args = args[:len(argspec.args) - (1 if constructor else 0)] call_args = args[:len(argspec.args) - (1 if constructor else 0)]
return call_args, call_kwarg return call_args, call_kwarg
argspec_args = argspec_args_legacy argspec_args = argspec_args_legacy
@ -215,9 +217,12 @@ def filter_index(collection, predicate=None, index=None):
return collection return collection
def set_defaults(defaults, kwargs): def set_defaults(defaults, kwargs, override=False):
""" """
Set defaults from defaults dict to kwargs dict Set defaults from defaults dict to kwargs dict
:param override:
:type override:
:param defaults: :param defaults:
:type defaults: :type defaults:
:param kwargs: :param kwargs:
@ -225,12 +230,13 @@ def set_defaults(defaults, kwargs):
:return: :return:
:rtype: :rtype:
""" """
if 'clear' in defaults.keys() and defaults.pop('clear'):
kwargs.clear()
for key, value in defaults.items(): for key, value in defaults.items():
if key not in kwargs and value is not None: if key in kwargs:
kwargs[key] = value if isinstance(value, list) and isinstance(kwargs[key], list):
elif isinstance(value, list) and isinstance(kwargs[key], list):
kwargs[key] = list(value) + kwargs[key] kwargs[key] = list(value) + kwargs[key]
elif isinstance(value, dict) and isinstance(kwargs[key], dict): elif isinstance(value, dict) and isinstance(kwargs[key], dict):
set_defaults(value, kwargs[key]) set_defaults(value, kwargs[key])
elif key in kwargs and value is None: if key not in kwargs or override:
kwargs[key] = None kwargs[key] = value

View file

@ -815,6 +815,24 @@ class Match(object):
return filter_index(ret, predicate, index) return filter_index(ret, predicate, index)
def tagged(self, *tags):
"""
Check if this match has at least one of the provided tags
:param tags:
:return: True if at least one tag is defined, False otherwise.
"""
return any(tag in self.tags for tag in tags)
def named(self, *names):
"""
Check if one of the children match has one of the provided name
:param names:
:return: True if at least one child is named with a given name is defined, False otherwise.
"""
return any(name in self.names for name in names)
def __len__(self): def __len__(self):
return self.end - self.start return self.end - self.start

View file

@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty
import six import six
from . import debug from . import debug
from .formatters import default_formatter
from .loose import call, ensure_list, ensure_dict from .loose import call, ensure_list, ensure_dict
from .match import Match from .match import Match
from .remodule import re, REGEX_AVAILABLE from .remodule import re, REGEX_AVAILABLE
from .utils import find_all, is_iterable, get_first_defined from .utils import find_all, is_iterable, get_first_defined
from .validators import allways_true
@six.add_metaclass(ABCMeta) @six.add_metaclass(ABCMeta)
class Pattern(object): class BasePattern(object):
"""
Base class for Pattern like objects
"""
@abstractmethod
def matches(self, input_string, context=None, with_raw_matches=False):
"""
Computes all matches for a given input
:param input_string: the string to parse
:type input_string: str
:param context: the context
:type context: dict
:param with_raw_matches: should return details
:type with_raw_matches: dict
:return: matches based on input_string for this pattern
:rtype: iterator[Match]
"""
pass
@six.add_metaclass(ABCMeta)
class Pattern(BasePattern):
""" """
Definition of a particular pattern to search for. Definition of a particular pattern to search for.
""" """
@ -25,7 +50,7 @@ class Pattern(object):
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False, def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None, private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None, marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
properties=None, post_processor=None, **kwargs): properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
""" """
:param name: Name of this pattern :param name: Name of this pattern
:type name: str :type name: str
@ -66,15 +91,19 @@ class Pattern(object):
:type disabled: bool|function :type disabled: bool|function
:param log_lvl: Log level associated to this pattern :param log_lvl: Log level associated to this pattern
:type log_lvl: int :type log_lvl: int
:param post_process: Post processing function :param post_processor: Post processing function
:type post_processor: func :type post_processor: func
:param pre_match_processor: Pre match processing function
:type pre_match_processor: func
:param post_match_processor: Post match processing function
:type post_match_processor: func
""" """
# pylint:disable=too-many-locals,unused-argument # pylint:disable=too-many-locals,unused-argument
self.name = name self.name = name
self.tags = ensure_list(tags) self.tags = ensure_list(tags)
self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x) self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
self.values, self._default_value = ensure_dict(value, None) self.values, self._default_value = ensure_dict(value, None)
self.validators, self._default_validator = ensure_dict(validator, lambda match: True) self.validators, self._default_validator = ensure_dict(validator, allways_true)
self.every = every self.every = every
self.children = children self.children = children
self.private = private self.private = private
@ -96,6 +125,14 @@ class Pattern(object):
self.post_processor = None self.post_processor = None
else: else:
self.post_processor = post_processor self.post_processor = post_processor
if not callable(pre_match_processor):
self.pre_match_processor = None
else:
self.pre_match_processor = pre_match_processor
if not callable(post_match_processor):
self.post_match_processor = None
else:
self.post_match_processor = post_match_processor
@property @property
def log_level(self): def log_level(self):
@ -106,83 +143,6 @@ class Pattern(object):
""" """
return self._log_level if self._log_level is not None else debug.LOG_LEVEL return self._log_level if self._log_level is not None else debug.LOG_LEVEL
def _yield_children(self, match):
"""
Does this match has children
:param match:
:type match:
:return:
:rtype:
"""
return match.children and (self.children or self.every)
def _yield_parent(self):
"""
Does this mat
:param match:
:type match:
:return:
:rtype:
"""
return not self.children or self.every
def _match_parent(self, match, yield_parent):
"""
Handle a parent match
:param match:
:type match:
:param yield_parent:
:type yield_parent:
:return:
:rtype:
"""
if not match or match.value == "":
return False
pattern_value = get_first_defined(self.values, [match.name, '__parent__', None],
self._default_value)
if pattern_value:
match.value = pattern_value
if yield_parent or self.format_all:
match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None],
self._default_formatter)
if yield_parent or self.validate_all:
validator = get_first_defined(self.validators, [match.name, '__parent__', None],
self._default_validator)
if validator and not validator(match):
return False
return True
def _match_child(self, child, yield_children):
"""
Handle a children match
:param child:
:type child:
:param yield_children:
:type yield_children:
:return:
:rtype:
"""
if not child or child.value == "":
return False
pattern_value = get_first_defined(self.values, [child.name, '__children__', None],
self._default_value)
if pattern_value:
child.value = pattern_value
if yield_children or self.format_all:
child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None],
self._default_formatter)
if yield_children or self.validate_all:
validator = get_first_defined(self.validators, [child.name, '__children__', None],
self._default_validator)
if validator and not validator(child):
return False
return True
def matches(self, input_string, context=None, with_raw_matches=False): def matches(self, input_string, context=None, with_raw_matches=False):
""" """
Computes all matches for a given input Computes all matches for a given input
@ -200,41 +160,168 @@ class Pattern(object):
matches = [] matches = []
raw_matches = [] raw_matches = []
for pattern in self.patterns: for pattern in self.patterns:
yield_parent = self._yield_parent() match_index = 0
match_index = -1
for match in self._match(pattern, input_string, context): for match in self._match(pattern, input_string, context):
match_index += 1
match.match_index = match_index
raw_matches.append(match) raw_matches.append(match)
yield_children = self._yield_children(match) matches.extend(self._process_matches(match, match_index))
if not self._match_parent(match, yield_parent): match_index += 1
continue
validated = True matches = self._post_process_matches(matches)
for child in match.children:
if not self._match_child(child, yield_children):
validated = False
break
if validated:
if self.private_parent:
match.private = True
if self.private_children:
for child in match.children:
child.private = True
if yield_parent or self.private_parent:
matches.append(match)
if yield_children or self.private_children:
for child in match.children:
child.match_index = match_index
matches.append(child)
matches = self._matches_post_process(matches)
self._matches_privatize(matches)
self._matches_ignore(matches)
if with_raw_matches: if with_raw_matches:
return matches, raw_matches return matches, raw_matches
return matches return matches
def _matches_post_process(self, matches): @property
def _should_include_children(self):
"""
Check if children matches from this pattern should be included in matches results.
:param match:
:type match:
:return:
:rtype:
"""
return self.children or self.every
@property
def _should_include_parent(self):
"""
Check is a match from this pattern should be included in matches results.
:param match:
:type match:
:return:
:rtype:
"""
return not self.children or self.every
@staticmethod
def _match_config_property_keys(match, child=False):
if match.name:
yield match.name
if child:
yield '__children__'
else:
yield '__parent__'
yield None
@staticmethod
def _process_match_index(match, match_index):
"""
Process match index from this pattern process state.
:param match:
:return:
"""
match.match_index = match_index
def _process_match_private(self, match, child=False):
"""
Process match privacy from this pattern configuration.
:param match:
:param child:
:return:
"""
if match.name and match.name in self.private_names or \
not child and self.private_parent or \
child and self.private_children:
match.private = True
def _process_match_value(self, match, child=False):
"""
Process match value from this pattern configuration.
:param match:
:return:
"""
keys = self._match_config_property_keys(match, child=child)
pattern_value = get_first_defined(self.values, keys, self._default_value)
if pattern_value:
match.value = pattern_value
def _process_match_formatter(self, match, child=False):
"""
Process match formatter from this pattern configuration.
:param match:
:return:
"""
included = self._should_include_children if child else self._should_include_parent
if included or self.format_all:
keys = self._match_config_property_keys(match, child=child)
match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
def _process_match_validator(self, match, child=False):
"""
Process match validation from this pattern configuration.
:param match:
:return: True if match is validated by the configured validator, False otherwise.
"""
included = self._should_include_children if child else self._should_include_parent
if included or self.validate_all:
keys = self._match_config_property_keys(match, child=child)
validator = get_first_defined(self.validators, keys, self._default_validator)
if validator and not validator(match):
return False
return True
def _process_match(self, match, match_index, child=False):
"""
Process match from this pattern by setting all properties from defined configuration
(index, private, value, formatter, validator, ...).
:param match:
:type match:
:return: True if match is validated by the configured validator, False otherwise.
:rtype:
"""
self._process_match_index(match, match_index)
self._process_match_private(match, child)
self._process_match_value(match, child)
self._process_match_formatter(match, child)
return self._process_match_validator(match, child)
@staticmethod
def _process_match_processor(match, processor):
if processor:
ret = processor(match)
if ret is not None:
return ret
return match
def _process_matches(self, match, match_index):
"""
Process and generate all matches for the given unprocessed match.
:param match:
:param match_index:
:return: Process and dispatched matches.
"""
match = self._process_match_processor(match, self.pre_match_processor)
if not match:
return
if not self._process_match(match, match_index):
return
for child in match.children:
if not self._process_match(child, match_index, child=True):
return
match = self._process_match_processor(match, self.post_match_processor)
if not match:
return
if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
yield match
if self._should_include_children or self.private_children:
children = [x for x in match.children if x.name not in self.ignore_names]
for child in children:
yield child
def _post_process_matches(self, matches):
""" """
Post process matches with user defined function Post process matches with user defined function
:param matches: :param matches:
@ -246,32 +333,6 @@ class Pattern(object):
return self.post_processor(matches, self) return self.post_processor(matches, self)
return matches return matches
def _matches_privatize(self, matches):
"""
Mark matches included in private_names with private flag.
:param matches:
:type matches:
:return:
:rtype:
"""
if self.private_names:
for match in matches:
if match.name in self.private_names:
match.private = True
def _matches_ignore(self, matches):
"""
Ignore matches included in ignore_names.
:param matches:
:type matches:
:return:
:rtype:
"""
if self.ignore_names:
for match in list(matches):
if match.name in self.ignore_names:
matches.remove(match)
@abstractproperty @abstractproperty
def patterns(self): # pragma: no cover def patterns(self): # pragma: no cover
""" """
@ -306,7 +367,7 @@ class Pattern(object):
@abstractmethod @abstractmethod
def _match(self, pattern, input_string, context=None): # pragma: no cover def _match(self, pattern, input_string, context=None): # pragma: no cover
""" """
Computes all matches for a given pattern and input Computes all unprocess matches for a given pattern and input.
:param pattern: the pattern to use :param pattern: the pattern to use
:param input_string: the string to parse :param input_string: the string to parse
@ -350,7 +411,9 @@ class StringPattern(Pattern):
def _match(self, pattern, input_string, context=None): def _match(self, pattern, input_string, context=None):
for index in find_all(input_string, pattern, **self._kwargs): for index in find_all(input_string, pattern, **self._kwargs):
yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs) match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
if match:
yield match
class RePattern(Pattern): class RePattern(Pattern):
@ -411,14 +474,17 @@ class RePattern(Pattern):
for start, end in match_object.spans(i): for start, end in match_object.spans(i):
child_match = Match(start, end, name=name, parent=main_match, pattern=self, child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs) input_string=input_string, **self._children_match_kwargs)
if child_match:
main_match.children.append(child_match) main_match.children.append(child_match)
else: else:
start, end = match_object.span(i) start, end = match_object.span(i)
if start > -1 and end > -1: if start > -1 and end > -1:
child_match = Match(start, end, name=name, parent=main_match, pattern=self, child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs) input_string=input_string, **self._children_match_kwargs)
if child_match:
main_match.children.append(child_match) main_match.children.append(child_match)
if main_match:
yield main_match yield main_match
@ -457,14 +523,18 @@ class FunctionalPattern(Pattern):
if self._match_kwargs: if self._match_kwargs:
options = self._match_kwargs.copy() options = self._match_kwargs.copy()
options.update(args) options.update(args)
yield Match(pattern=self, input_string=input_string, **options) match = Match(pattern=self, input_string=input_string, **options)
if match:
yield match
else: else:
kwargs = self._match_kwargs kwargs = self._match_kwargs
if isinstance(args[-1], dict): if isinstance(args[-1], dict):
kwargs = dict(kwargs) kwargs = dict(kwargs)
kwargs.update(args[-1]) kwargs.update(args[-1])
args = args[:-1] args = args[:-1]
yield Match(*args, pattern=self, input_string=input_string, **kwargs) match = Match(*args, pattern=self, input_string=input_string, **kwargs)
if match:
yield match
def filter_match_kwargs(kwargs, children=False): def filter_match_kwargs(kwargs, children=False):

View file

@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk
""" """
from logging import getLogger from logging import getLogger
from .builder import Builder
from .match import Matches from .match import Matches
from .pattern import RePattern, StringPattern, FunctionalPattern
from .chain import Chain
from .processors import ConflictSolver, PrivateRemover from .processors import ConflictSolver, PrivateRemover
from .loose import set_defaults
from .utils import extend_safe
from .rules import Rules from .rules import Rules
from .utils import extend_safe
log = getLogger(__name__).log log = getLogger(__name__).log
class Rebulk(object): class Rebulk(Builder):
r""" r"""
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types. chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
@ -44,6 +40,7 @@ class Rebulk(object):
>>> bulk.matches("the lakers are from la") >>> bulk.matches("the lakers are from la")
[<lakers:(4, 10)>, <la:(20, 22)>] [<lakers:(4, 10)>, <la:(20, 22)>]
""" """
# pylint:disable=protected-access # pylint:disable=protected-access
def __init__(self, disabled=lambda context: False, default_rules=True): def __init__(self, disabled=lambda context: False, default_rules=True):
@ -56,6 +53,7 @@ class Rebulk(object):
:return: :return:
:rtype: :rtype:
""" """
super(Rebulk, self).__init__()
if not callable(disabled): if not callable(disabled):
self.disabled = lambda context: disabled self.disabled = lambda context: disabled
else: else:
@ -64,11 +62,6 @@ class Rebulk(object):
self._rules = Rules() self._rules = Rules()
if default_rules: if default_rules:
self.rules(ConflictSolver, PrivateRemover) self.rules(ConflictSolver, PrivateRemover)
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
self._chain_defaults = {}
self._rebulks = [] self._rebulks = []
def pattern(self, *pattern): def pattern(self, *pattern):
@ -83,172 +76,6 @@ class Rebulk(object):
self._patterns.extend(pattern) self._patterns.extend(pattern)
return self return self
def defaults(self, **kwargs):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._defaults = kwargs
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._regex_defaults = kwargs
return self
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_re(*pattern, **kwargs))
return self
def build_re(self, *pattern, **kwargs):
"""
Builds a new regular expression pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return RePattern(*pattern, **kwargs)
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._string_defaults = kwargs
return self
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_string(*pattern, **kwargs))
return self
def build_string(self, *pattern, **kwargs):
"""
Builds a new string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._string_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return StringPattern(*pattern, **kwargs)
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._functional_defaults = kwargs
return self
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_functional(*pattern, **kwargs))
return self
def build_functional(self, *pattern, **kwargs):
"""
Builds a new functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return FunctionalPattern(*pattern, **kwargs)
def chain_defaults(self, **kwargs):
"""
Define default keyword arguments for patterns chain.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._chain_defaults = kwargs
return self
def chain(self, **kwargs):
"""
Add patterns chain, using configuration of this rebulk
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
chain = self.build_chain(**kwargs)
self._patterns.append(chain)
return chain
def build_chain(self, **kwargs):
"""
Builds a new patterns chain
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._chain_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return Chain(self, **kwargs)
def rules(self, *rules): def rules(self, *rules):
""" """
Add rules as a module, class or instance. Add rules as a module, class or instance.

View file

@ -2,11 +2,11 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition # pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
import re import re
from functools import partial from functools import partial
from rebulk.pattern import FunctionalPattern, StringPattern, RePattern
from ..rebulk import Rebulk
from ..validators import chars_surround from ..validators import chars_surround
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
def test_chain_close(): def test_chain_close():
@ -63,18 +63,61 @@ def test_build_chain():
def test_chain_defaults(): def test_chain_defaults():
rebulk = Rebulk() rebulk = Rebulk()
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True) rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True)
rebulk.chain()\ rebulk.chain() \
.regex("(?P<test>test)") \ .regex("(?P<test>test)") \
.regex(" ").repeater("*") \ .regex(" ").repeater("*") \
.regex("(?P<best>best)") \
.regex(" ").repeater("*") \
.regex("(?P<testIgnore>testIgnore)") .regex("(?P<testIgnore>testIgnore)")
matches = rebulk.matches("test testIgnore") matches = rebulk.matches("test best testIgnore")
assert len(matches) == 1 assert len(matches) == 1
assert matches[0].name == "test" assert matches[0].name == "test"
def test_chain_with_validators():
def chain_validator(match):
return match.value.startswith('t') and match.value.endswith('t')
def default_validator(match):
return match.value.startswith('t') and match.value.endswith('g')
def custom_validator(match):
return match.value.startswith('b') and match.value.endswith('t')
rebulk = Rebulk()
rebulk.defaults(children=True, validator=default_validator)
rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \
.regex("(?P<test>testing)", validator=default_validator).repeater("+") \
.regex(" ").repeater("+") \
.regex("(?P<best>best)", validator=custom_validator).repeater("+")
matches = rebulk.matches("some testing best end")
assert len(matches) == 2
assert matches[0].name == "test"
assert matches[1].name == "best"
def test_matches_docs():
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \
.defaults(children=True, formatter={'episode': int, 'version': int}) \
.chain() \
.regex(r'e(?P<episode>\d{1,4})').repeater(1) \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
.close() # .repeater(1) could be omitted as it's the default behavior
result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict
assert 'episode' in result
assert result['episode'] == [14, 15, 16, 17]
assert 'version' in result
assert result['version'] == 2
def test_matches(): def test_matches():
rebulk = Rebulk() rebulk = Rebulk()
@ -144,8 +187,8 @@ def test_matches():
def test_matches_2(): def test_matches_2():
rebulk = Rebulk() \ rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \ .regex_defaults(flags=re.IGNORECASE) \
.chain(children=True, formatter={'episode': int}) \ .defaults(children=True, formatter={'episode': int, 'version': int}) \
.defaults(formatter={'version': int}) \ .chain() \
.regex(r'e(?P<episode>\d{1,4})') \ .regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \ .regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
@ -173,25 +216,32 @@ def test_matches_2():
def test_matches_3(): def test_matches_3():
alt_dash = (r'@', r'[\W_]') # abbreviation alt_dash = (r'@', r'[\W_]') # abbreviation
rebulk = Rebulk() match_names = ['season', 'episode']
other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']
rebulk.chain(formatter={'season': int, 'episode': int}, rebulk = Rebulk()
rebulk.defaults(formatter={'season': int, 'episode': int},
tags=['SxxExx'], tags=['SxxExx'],
abbreviations=[alt_dash], abbreviations=[alt_dash],
private_names=['episodeSeparator', 'seasonSeparator'], private_names=['episodeSeparator', 'seasonSeparator'],
children=True, children=True,
private_parent=True, private_parent=True,
conflict_solver=lambda match, other: match conflict_solver=lambda match, other: match
if match.name in ['season', 'episode'] and other.name in if match.name in match_names and other.name in other_names
['screen_size', 'video_codec', 'audio_codec', else '__default__')
'audio_channels', 'container', 'date']
else '__default__') \ rebulk.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \ .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \ .regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
.close() \
.chain() \ .chain() \
.defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \ .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
.close() \
.chain() \ .chain() \
.defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)') \ .regex(r'S(?P<season>\d+)') \
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*') .regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
@ -240,11 +290,11 @@ def test_matches_4():
rebulk = Rebulk() rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, rebulk.defaults(validate_all=True, children=True)
validator={'__parent__': seps_surround}, children=True, private_parent=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \ rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \ .defaults(formatter={'episode': int, 'version': int}) \
.regex(r'e(?P<episode>\d{1,4})') \ .regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
@ -262,11 +312,11 @@ def test_matches_5():
rebulk = Rebulk() rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \ rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
.defaults(validator=None) \ validator={'__parent__': seps_surround}, children=True, private_parent=True,
formatter={'episode': int, 'version': int}) \
.defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \ .regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}') .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
@ -288,7 +338,7 @@ def test_matches_6():
validator=None, children=True, private_parent=True) validator=None, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \ rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \ .defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \ .regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}') .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')

View file

@ -2,19 +2,15 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition # pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
from .default_rules_module import RuleRemove0
from .. import debug
from ..match import Match
from ..pattern import StringPattern from ..pattern import StringPattern
from ..rebulk import Rebulk from ..rebulk import Rebulk
from ..match import Match
from .. import debug
from .default_rules_module import RuleRemove0
class TestDebug(object): class TestDebug(object):
# request.addfinalizer(disable_debug)
#request.addfinalizer(disable_debug)
debug.DEBUG = True debug.DEBUG = True
pattern = StringPattern(1, 3, value="es") pattern = StringPattern(1, 3, value="es")
@ -38,43 +34,43 @@ class TestDebug(object):
debug.DEBUG = False debug.DEBUG = False
def test_pattern(self): def test_pattern(self):
assert self.pattern.defined_at.lineno == 20 assert self.pattern.defined_at.lineno > 0
assert self.pattern.defined_at.name == 'rebulk.test.test_debug' assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
assert self.pattern.defined_at.filename.endswith('test_debug.py') assert self.pattern.defined_at.filename.endswith('test_debug.py')
assert str(self.pattern.defined_at) == 'test_debug.py#L20' assert str(self.pattern.defined_at).startswith('test_debug.py#L')
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>' assert repr(self.pattern).startswith('<StringPattern@test_debug.py#L')
def test_match(self): def test_match(self):
assert self.match.defined_at.lineno == 22 assert self.match.defined_at.lineno > 0
assert self.match.defined_at.name == 'rebulk.test.test_debug' assert self.match.defined_at.name == 'rebulk.test.test_debug'
assert self.match.defined_at.filename.endswith('test_debug.py') assert self.match.defined_at.filename.endswith('test_debug.py')
assert str(self.match.defined_at) == 'test_debug.py#L22' assert str(self.match.defined_at).startswith('test_debug.py#L')
def test_rule(self): def test_rule(self):
assert self.rule.defined_at.lineno == 23 assert self.rule.defined_at.lineno > 0
assert self.rule.defined_at.name == 'rebulk.test.test_debug' assert self.rule.defined_at.name == 'rebulk.test.test_debug'
assert self.rule.defined_at.filename.endswith('test_debug.py') assert self.rule.defined_at.filename.endswith('test_debug.py')
assert str(self.rule.defined_at) == 'test_debug.py#L23' assert str(self.rule.defined_at).startswith('test_debug.py#L')
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>' assert repr(self.rule).startswith('<RuleRemove0@test_debug.py#L')
def test_rebulk(self): def test_rebulk(self):
""" """
This test fails on travis CI, can't find out why there's 1 line offset ... This test fails on travis CI, can't find out why there's 1 line offset ...
""" """
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27] assert self.rebulk._patterns[0].defined_at.lineno > 0
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug' assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py') assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27'] assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L')
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28] assert self.rebulk._patterns[1].defined_at.lineno > 0
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug' assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py') assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28'] assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L')
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at

View file

@ -116,6 +116,9 @@ class TestMatchesClass(object):
assert "tag1" in matches.tags assert "tag1" in matches.tags
assert "tag2" in matches.tags assert "tag2" in matches.tags
assert self.match3.tagged("tag1")
assert not self.match3.tagged("start")
tag1 = matches.tagged("tag1") tag1 = matches.tagged("tag1")
assert len(tag1) == 2 assert len(tag1) == 2
assert tag1[0] == self.match2 assert tag1[0] == self.match2

View file

@ -62,9 +62,20 @@ def validators(*chained_validators):
:return: :return:
:rtype: :rtype:
""" """
def validator_chain(match): # pylint:disable=missing-docstring def validator_chain(match): # pylint:disable=missing-docstring
for chained_validator in chained_validators: for chained_validator in chained_validators:
if not chained_validator(match): if not chained_validator(match):
return False return False
return True return True
return validator_chain return validator_chain
def allways_true(match): # pylint:disable=unused-argument
"""
A validator which is allways true
:param match:
:return:
"""
return True