Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2
Updates rebulk to 2.0.1
This commit is contained in:
Labrys of Knossos 2022-11-28 19:44:46 -05:00
commit 2226a74ef8
66 changed files with 2995 additions and 1306 deletions

View file

@ -1,4 +1,5 @@
# coding: utf-8
# file generated by setuptools_scm
# don't change, don't track in version control
version = '2.7.5'
version = '2.8.2'
version_tuple = (2, 8, 2)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
This module offers a generic easter computing method for any given year, using
This module offers a generic Easter computing method for any given year, using
Western, Orthodox or Julian algorithms.
"""
@ -21,15 +21,15 @@ def easter(year, method=EASTER_WESTERN):
quoted in "Explanatory Supplement to the Astronomical
Almanac", P. Kenneth Seidelmann, editor.
This algorithm implements three different easter
This algorithm implements three different Easter
calculation methods:
1 - Original calculation in Julian calendar, valid in
dates after 326 AD
2 - Original method, with date converted to Gregorian
calendar, valid in years 1583 to 4099
3 - Revised method, in Gregorian calendar, valid in
years 1583 to 4099 as well
1. Original calculation in Julian calendar, valid in
dates after 326 AD
2. Original method, with date converted to Gregorian
calendar, valid in years 1583 to 4099
3. Revised method, in Gregorian calendar, valid in
years 1583 to 4099 as well
These methods are represented by the constants:

View file

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
from ._parser import parse, parser, parserinfo
from ._parser import parse, parser, parserinfo, ParserError
from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
from ._parser import UnknownTimezoneWarning
@ -9,6 +9,7 @@ from .isoparser import isoparser, isoparse
__all__ = ['parse', 'parser', 'parserinfo',
'isoparse', 'isoparser',
'ParserError',
'UnknownTimezoneWarning']

View file

@ -20,11 +20,11 @@ value falls back to the end of the month.
Additional resources about date/time string formats can be found below:
- `A summary of the international standard date and time notation
<http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
<https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
- `CPAN ParseDate module
<http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
<https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
- `Java SimpleDateFormat Class
<https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
"""
@ -40,7 +40,7 @@ from calendar import monthrange
from io import StringIO
import six
from six import binary_type, integer_types, text_type
from six import integer_types, text_type
from decimal import Decimal
@ -49,7 +49,7 @@ from warnings import warn
from .. import relativedelta
from .. import tz
__all__ = ["parse", "parserinfo"]
__all__ = ["parse", "parserinfo", "ParserError"]
# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth
@ -60,14 +60,8 @@ class _timelex(object):
_split_decimal = re.compile("([.,])")
def __init__(self, instream):
if six.PY2:
# In Python 2, we can't duck type properly because unicode has
# a 'decode' function, and we'd be double-decoding
if isinstance(instream, (binary_type, bytearray)):
instream = instream.decode()
else:
if getattr(instream, 'decode', None) is not None:
instream = instream.decode()
if isinstance(instream, (bytes, bytearray)):
instream = instream.decode()
if isinstance(instream, text_type):
instream = StringIO(instream)
@ -291,7 +285,7 @@ class parserinfo(object):
("s", "second", "seconds")]
AMPM = [("am", "a"),
("pm", "p")]
UTCZONE = ["UTC", "GMT", "Z"]
UTCZONE = ["UTC", "GMT", "Z", "z"]
PERTAIN = ["of"]
TZOFFSET = {}
# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
@ -388,7 +382,8 @@ class parserinfo(object):
if res.year is not None:
res.year = self.convertyear(res.year, res.century_specified)
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
if ((res.tzoffset == 0 and not res.tzname) or
(res.tzname == 'Z' or res.tzname == 'z')):
res.tzname = "UTC"
res.tzoffset = 0
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
@ -422,7 +417,7 @@ class _ymd(list):
elif not self.has_month:
return 1 <= value <= 31
elif not self.has_year:
# Be permissive, assume leapyear
# Be permissive, assume leap year
month = self[self.mstridx]
return 1 <= value <= monthrange(2000, month)[1]
else:
@ -538,7 +533,7 @@ class _ymd(list):
year, month, day = self
else:
# 01-Jan-01
# Give precendence to day-first, since
# Give precedence to day-first, since
# two-digit years is usually hand-written.
day, month, year = self
@ -625,7 +620,7 @@ class parser(object):
first element being a :class:`datetime.datetime` object, the second
a tuple containing the fuzzy tokens.
:raises ValueError:
:raises ParserError:
Raised for invalid or unknown string format, if the provided
:class:`tzinfo` is not in a valid format, or if an invalid date
would be created.
@ -645,12 +640,15 @@ class parser(object):
res, skipped_tokens = self._parse(timestr, **kwargs)
if res is None:
raise ValueError("Unknown string format:", timestr)
raise ParserError("Unknown string format: %s", timestr)
if len(res) == 0:
raise ValueError("String does not contain a date:", timestr)
raise ParserError("String does not contain a date: %s", timestr)
ret = self._build_naive(res, default)
try:
ret = self._build_naive(res, default)
except ValueError as e:
six.raise_from(ParserError(str(e) + ": %s", timestr), e)
if not ignoretz:
ret = self._build_tzaware(ret, res, tzinfos)
@ -1021,7 +1019,7 @@ class parser(object):
hms_idx = idx + 2
elif idx > 0 and info.hms(tokens[idx-1]) is not None:
# There is a "h", "m", or "s" preceeding this token. Since neither
# There is a "h", "m", or "s" preceding this token. Since neither
# of the previous cases was hit, there is no label following this
# token, so we use the previous label.
# e.g. the "04" in "12h04"
@ -1060,7 +1058,8 @@ class parser(object):
tzname is None and
tzoffset is None and
len(token) <= 5 and
all(x in string.ascii_uppercase for x in token))
(all(x in string.ascii_uppercase for x in token)
or token in self.info.UTCZONE))
def _ampm_valid(self, hour, ampm, fuzzy):
"""
@ -1100,7 +1099,7 @@ class parser(object):
def _parse_min_sec(self, value):
# TODO: Every usage of this function sets res.second to the return
# value. Are there any cases where second will be returned as None and
# we *dont* want to set res.second = None?
# we *don't* want to set res.second = None?
minute = int(value)
second = None
@ -1109,14 +1108,6 @@ class parser(object):
second = int(60 * sec_remainder)
return (minute, second)
def _parsems(self, value):
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
if "." not in value:
return int(value), 0
else:
i, f = value.split(".")
return int(i), int(f.ljust(6, "0")[:6])
def _parse_hms(self, idx, tokens, info, hms_idx):
# TODO: Is this going to admit a lot of false-positives for when we
# just happen to have digits and "h", "m" or "s" characters in non-date
@ -1135,21 +1126,35 @@ class parser(object):
return (new_idx, hms)
def _recombine_skipped(self, tokens, skipped_idxs):
"""
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
>>> skipped_idxs = [0, 1, 2, 5]
>>> _recombine_skipped(tokens, skipped_idxs)
["foo bar", "baz"]
"""
skipped_tokens = []
for i, idx in enumerate(sorted(skipped_idxs)):
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
else:
skipped_tokens.append(tokens[idx])
# ------------------------------------------------------------------
# Handling for individual tokens. These are kept as methods instead
# of functions for the sake of customizability via subclassing.
return skipped_tokens
def _parsems(self, value):
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
if "." not in value:
return int(value), 0
else:
i, f = value.split(".")
return int(i), int(f.ljust(6, "0")[:6])
def _to_decimal(self, val):
try:
decimal_value = Decimal(val)
# See GH 662, edge case, infinite value should not be converted
# via `_to_decimal`
if not decimal_value.is_finite():
raise ValueError("Converted decimal value is infinite or NaN")
except Exception as e:
msg = "Could not convert %s to decimal" % val
six.raise_from(ValueError(msg), e)
else:
return decimal_value
# ------------------------------------------------------------------
# Post-Parsing construction of datetime output. These are kept as
# methods instead of functions for the sake of customizability via
# subclassing.
def _build_tzinfo(self, tzinfos, tzname, tzoffset):
if callable(tzinfos):
@ -1164,6 +1169,9 @@ class parser(object):
tzinfo = tz.tzstr(tzdata)
elif isinstance(tzdata, integer_types):
tzinfo = tz.tzoffset(tzname, tzdata)
else:
raise TypeError("Offset must be tzinfo subclass, tz string, "
"or int offset.")
return tzinfo
def _build_tzaware(self, naive, res, tzinfos):
@ -1181,10 +1189,10 @@ class parser(object):
# This is mostly relevant for winter GMT zones parsed in the UK
if (aware.tzname() != res.tzname and
res.tzname in self.info.UTCZONE):
aware = aware.replace(tzinfo=tz.tzutc())
aware = aware.replace(tzinfo=tz.UTC)
elif res.tzoffset == 0:
aware = naive.replace(tzinfo=tz.tzutc())
aware = naive.replace(tzinfo=tz.UTC)
elif res.tzoffset:
aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
@ -1239,17 +1247,21 @@ class parser(object):
return dt
def _to_decimal(self, val):
try:
decimal_value = Decimal(val)
# See GH 662, edge case, infinite value should not be converted via `_to_decimal`
if not decimal_value.is_finite():
raise ValueError("Converted decimal value is infinite or NaN")
except Exception as e:
msg = "Could not convert %s to decimal" % val
six.raise_from(ValueError(msg), e)
else:
return decimal_value
def _recombine_skipped(self, tokens, skipped_idxs):
"""
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
>>> skipped_idxs = [0, 1, 2, 5]
>>> _recombine_skipped(tokens, skipped_idxs)
["foo bar", "baz"]
"""
skipped_tokens = []
for i, idx in enumerate(sorted(skipped_idxs)):
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
else:
skipped_tokens.append(tokens[idx])
return skipped_tokens
DEFAULTPARSER = parser()
@ -1341,10 +1353,10 @@ def parse(timestr, parserinfo=None, **kwargs):
first element being a :class:`datetime.datetime` object, the second
a tuple containing the fuzzy tokens.
:raises ValueError:
Raised for invalid or unknown string format, if the provided
:class:`tzinfo` is not in a valid format, or if an invalid date
would be created.
:raises ParserError:
Raised for invalid or unknown string formats, if the provided
:class:`tzinfo` is not in a valid format, or if an invalid date would
be created.
:raises OverflowError:
Raised if the parsed date exceeds the largest valid C integer on
@ -1573,6 +1585,29 @@ DEFAULTTZPARSER = _tzparser()
def _parsetz(tzstr):
return DEFAULTTZPARSER.parse(tzstr)
class ParserError(ValueError):
"""Exception subclass used for any failure to parse a datetime string.
This is a subclass of :py:exc:`ValueError`, and should be raised any time
earlier versions of ``dateutil`` would have raised ``ValueError``.
.. versionadded:: 2.8.1
"""
def __str__(self):
try:
return self.args[0] % self.args[1:]
except (TypeError, IndexError):
return super(ParserError, self).__str__()
def __repr__(self):
args = ", ".join("'%s'" % arg for arg in self.args)
return "%s(%s)" % (self.__class__.__name__, args)
class UnknownTimezoneWarning(RuntimeWarning):
"""Raised when the parser finds a timezone it cannot parse into a tzinfo"""
"""Raised when the parser finds a timezone it cannot parse into a tzinfo.
.. versionadded:: 2.7.0
"""
# vim:ts=4:sw=4:et

View file

@ -88,10 +88,12 @@ class isoparser(object):
- ``hh``
- ``hh:mm`` or ``hhmm``
- ``hh:mm:ss`` or ``hhmmss``
- ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits)
- ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
Midnight is a special case for `hh`, as the standard supports both
00:00 and 24:00 as a representation.
00:00 and 24:00 as a representation. The decimal separator can be
either a dot or a comma.
.. caution::
@ -137,6 +139,10 @@ class isoparser(object):
else:
raise ValueError('String contains unknown ISO components')
if len(components) > 3 and components[3] == 24:
components[3] = 0
return datetime(*components) + timedelta(days=1)
return datetime(*components)
@_takes_ascii
@ -153,7 +159,7 @@ class isoparser(object):
components, pos = self._parse_isodate(datestr)
if pos < len(datestr):
raise ValueError('String contains unknown ISO ' +
'components: {}'.format(datestr))
'components: {!r}'.format(datestr.decode('ascii')))
return date(*components)
@_takes_ascii
@ -167,7 +173,10 @@ class isoparser(object):
:return:
Returns a :class:`datetime.time` object
"""
return time(*self._parse_isotime(timestr))
components = self._parse_isotime(timestr)
if components[0] == 24:
components[0] = 0
return time(*components)
@_takes_ascii
def parse_tzstr(self, tzstr, zero_as_utc=True):
@ -190,10 +199,9 @@ class isoparser(object):
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
# Constants
_MICROSECOND_END_REGEX = re.compile(b'[-+Z]+')
_DATE_SEP = b'-'
_TIME_SEP = b':'
_MICRO_SEP = b'.'
_FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
def _parse_isodate(self, dt_str):
try:
@ -325,39 +333,42 @@ class isoparser(object):
pos = 0
comp = -1
if len(timestr) < 2:
if len_str < 2:
raise ValueError('ISO time too short')
has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
has_sep = False
while pos < len_str and comp < 5:
comp += 1
if timestr[pos:pos + 1] in b'-+Z':
if timestr[pos:pos + 1] in b'-+Zz':
# Detect time zone boundary
components[-1] = self._parse_tzstr(timestr[pos:])
pos = len_str
break
if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
has_sep = True
pos += 1
elif comp == 2 and has_sep:
if timestr[pos:pos+1] != self._TIME_SEP:
raise ValueError('Inconsistent use of colon separator')
pos += 1
if comp < 3:
# Hour, minute, second
components[comp] = int(timestr[pos:pos + 2])
pos += 2
if (has_sep and pos < len_str and
timestr[pos:pos + 1] == self._TIME_SEP):
pos += 1
if comp == 3:
# Microsecond
if timestr[pos:pos + 1] != self._MICRO_SEP:
# Fraction of a second
frac = self._FRACTION_REGEX.match(timestr[pos:])
if not frac:
continue
pos += 1
us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6],
1)[0]
us_str = frac.group(1)[:6] # Truncate to microseconds
components[comp] = int(us_str) * 10**(6 - len(us_str))
pos += len(us_str)
pos += len(frac.group())
if pos < len_str:
raise ValueError('Unused components in ISO string')
@ -366,13 +377,12 @@ class isoparser(object):
# Standard supports 00:00 and 24:00 as representations of midnight
if any(component != 0 for component in components[1:4]):
raise ValueError('Hour may only be 24 at 24:00:00.000')
components[0] = 0
return components
def _parse_tzstr(self, tzstr, zero_as_utc=True):
if tzstr == b'Z':
return tz.tzutc()
if tzstr == b'Z' or tzstr == b'z':
return tz.UTC
if len(tzstr) not in {3, 5, 6}:
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
@ -391,7 +401,7 @@ class isoparser(object):
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
if zero_as_utc and hours == 0 and minutes == 0:
return tz.tzutc()
return tz.UTC
else:
if minutes > 59:
raise ValueError('Invalid minutes in time zone offset')

View file

@ -17,8 +17,12 @@ __all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
class relativedelta(object):
"""
The relativedelta type is based on the specification of the excellent
work done by M.-A. Lemburg in his
The relativedelta type is designed to be applied to an existing datetime and
can replace specific components of that datetime, or represents an interval
of time.
It is based on the specification of the excellent work done by M.-A. Lemburg
in his
`mx.DateTime <https://www.egenix.com/products/python/mxBase/mxDateTime/>`_ extension.
However, notice that this type does *NOT* implement the same algorithm as
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
@ -41,17 +45,19 @@ class relativedelta(object):
years, months, weeks, days, hours, minutes, seconds, microseconds:
Relative information, may be negative (argument is plural); adding
or subtracting a relativedelta with relative information performs
the corresponding aritmetic operation on the original datetime value
the corresponding arithmetic operation on the original datetime value
with the information in the relativedelta.
weekday:
One of the weekday instances (MO, TU, etc). These
instances may receive a parameter N, specifying the Nth
weekday, which could be positive or negative (like MO(+1)
or MO(-2). Not specifying it is the same as specifying
+1. You can also use an integer, where 0=MO. Notice that
if the calculated date is already Monday, for example,
using MO(1) or MO(-1) won't change the day.
One of the weekday instances (MO, TU, etc) available in the
relativedelta module. These instances may receive a parameter N,
specifying the Nth weekday, which could be positive or negative
(like MO(+1) or MO(-2)). Not specifying it is the same as specifying
+1. You can also use an integer, where 0=MO. This argument is always
relative e.g. if the calculated date is already Monday, using MO(1)
or MO(-1) won't change the day. To effectively make it absolute, use
it in combination with the day argument (e.g. day=1, MO(1) for first
Monday of the month).
leapdays:
Will add given days to the date found, if year is a leap
@ -82,9 +88,12 @@ class relativedelta(object):
For example
>>> from datetime import datetime
>>> from dateutil.relativedelta import relativedelta, MO
>>> dt = datetime(2018, 4, 9, 13, 37, 0)
>>> delta = relativedelta(hours=25, day=1, weekday=MO(1))
datetime(2018, 4, 2, 14, 37, 0)
>>> dt + delta
datetime.datetime(2018, 4, 2, 14, 37)
First, the day is set to 1 (the first of the month), then 25 hours
are added, to get to the 2nd day and 14th hour, finally the
@ -276,7 +285,7 @@ class relativedelta(object):
values for the relative attributes.
>>> relativedelta(days=1.5, hours=2).normalized()
relativedelta(days=1, hours=14)
relativedelta(days=+1, hours=+14)
:return:
Returns a :class:`dateutil.relativedelta.relativedelta` object.

View file

@ -5,27 +5,27 @@ the recurrence rules documented in the
`iCalendar RFC <https://tools.ietf.org/html/rfc5545>`_,
including support for caching of results.
"""
import itertools
import datetime
import calendar
import datetime
import heapq
import itertools
import re
import sys
from functools import wraps
# For warning about deprecation of until and count
from warnings import warn
from six import advance_iterator, integer_types
from six.moves import _thread, range
from ._common import weekday as weekdaybase
try:
from math import gcd
except ImportError:
from fractions import gcd
from six import advance_iterator, integer_types
from six.moves import _thread, range
import heapq
from ._common import weekday as weekdaybase
from .tz import tzutc, tzlocal
# For warning about deprecation of until and count
from warnings import warn
__all__ = ["rrule", "rruleset", "rrulestr",
"YEARLY", "MONTHLY", "WEEKLY", "DAILY",
"HOURLY", "MINUTELY", "SECONDLY",
@ -82,6 +82,7 @@ def _invalidates_cache(f):
Decorator for rruleset methods which may invalidate the
cached length.
"""
@wraps(f)
def inner_func(self, *args, **kwargs):
rv = f(self, *args, **kwargs)
self._invalidate_cache()
@ -178,7 +179,7 @@ class rrulebase(object):
return False
return False
# __len__() introduces a large performance penality.
# __len__() introduces a large performance penalty.
def count(self):
""" Returns the number of recurrences in this set. It will have go
trough the whole recurrence, if this hasn't been done before. """
@ -353,20 +354,26 @@ class rrule(rrulebase):
from calendar.firstweekday(), and may be modified by
calendar.setfirstweekday().
:param count:
How many occurrences will be generated.
If given, this determines how many occurrences will be generated.
.. note::
As of version 2.5.0, the use of the ``until`` keyword together
with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10.
As of version 2.5.0, the use of the keyword ``until`` in conjunction
with ``count`` is deprecated, to make sure ``dateutil`` is fully
compliant with `RFC-5545 Sec. 3.3.10 <https://tools.ietf.org/
html/rfc5545#section-3.3.10>`_. Therefore, ``until`` and ``count``
**must not** occur in the same call to ``rrule``.
:param until:
If given, this must be a datetime instance, that will specify the
If given, this must be a datetime instance specifying the upper-bound
limit of the recurrence. The last recurrence in the rule is the greatest
datetime that is less than or equal to the value specified in the
``until`` parameter.
.. note::
As of version 2.5.0, the use of the ``until`` keyword together
with the ``count`` keyword is deprecated per RFC-5545 Sec. 3.3.10.
As of version 2.5.0, the use of the keyword ``until`` in conjunction
with ``count`` is deprecated, to make sure ``dateutil`` is fully
compliant with `RFC-5545 Sec. 3.3.10 <https://tools.ietf.org/
html/rfc5545#section-3.3.10>`_. Therefore, ``until`` and ``count``
**must not** occur in the same call to ``rrule``.
:param bysetpos:
If given, it must be either an integer, or a sequence of integers,
positive or negative. Each given integer will specify an occurrence
@ -429,7 +436,7 @@ class rrule(rrulebase):
if not dtstart:
if until and until.tzinfo:
dtstart = datetime.datetime.now(tz=until.tzinfo).replace(microsecond=0)
else:
else:
dtstart = datetime.datetime.now().replace(microsecond=0)
elif not isinstance(dtstart, datetime.datetime):
dtstart = datetime.datetime.fromordinal(dtstart.toordinal())
@ -1406,7 +1413,52 @@ class rruleset(rrulebase):
self._len = total
class _rrulestr(object):
""" Parses a string representation of a recurrence rule or set of
recurrence rules.
:param s:
Required, a string defining one or more recurrence rules.
:param dtstart:
If given, used as the default recurrence start if not specified in the
rule string.
:param cache:
If set ``True`` caching of results will be enabled, improving
performance of multiple queries considerably.
:param unfold:
If set ``True`` indicates that a rule string is split over more
than one line and should be joined before processing.
:param forceset:
If set ``True`` forces a :class:`dateutil.rrule.rruleset` to
be returned.
:param compatible:
If set ``True`` forces ``unfold`` and ``forceset`` to be ``True``.
:param ignoretz:
If set ``True``, time zones in parsed strings are ignored and a naive
:class:`datetime.datetime` object is returned.
:param tzids:
If given, a callable or mapping used to retrieve a
:class:`datetime.tzinfo` from a string representation.
Defaults to :func:`dateutil.tz.gettz`.
:param tzinfos:
Additional time zone names / aliases which may be present in a string
representation. See :func:`dateutil.parser.parse` for more
information.
:return:
Returns a :class:`dateutil.rrule.rruleset` or
:class:`dateutil.rrule.rrule`
"""
_freq_map = {"YEARLY": YEARLY,
"MONTHLY": MONTHLY,
@ -1508,6 +1560,58 @@ class _rrulestr(object):
raise ValueError("invalid '%s': %s" % (name, value))
return rrule(dtstart=dtstart, cache=cache, **rrkwargs)
def _parse_date_value(self, date_value, parms, rule_tzids,
ignoretz, tzids, tzinfos):
global parser
if not parser:
from dateutil import parser
datevals = []
value_found = False
TZID = None
for parm in parms:
if parm.startswith("TZID="):
try:
tzkey = rule_tzids[parm.split('TZID=')[-1]]
except KeyError:
continue
if tzids is None:
from . import tz
tzlookup = tz.gettz
elif callable(tzids):
tzlookup = tzids
else:
tzlookup = getattr(tzids, 'get', None)
if tzlookup is None:
msg = ('tzids must be a callable, mapping, or None, '
'not %s' % tzids)
raise ValueError(msg)
TZID = tzlookup(tzkey)
continue
# RFC 5445 3.8.2.4: The VALUE parameter is optional, but may be found
# only once.
if parm not in {"VALUE=DATE-TIME", "VALUE=DATE"}:
raise ValueError("unsupported parm: " + parm)
else:
if value_found:
msg = ("Duplicate value parameter found in: " + parm)
raise ValueError(msg)
value_found = True
for datestr in date_value.split(','):
date = parser.parse(datestr, ignoretz=ignoretz, tzinfos=tzinfos)
if TZID is not None:
if date.tzinfo is None:
date = date.replace(tzinfo=TZID)
else:
raise ValueError('DTSTART/EXDATE specifies multiple timezone')
datevals.append(date)
return datevals
def _parse_rfc(self, s,
dtstart=None,
cache=False,
@ -1580,54 +1684,18 @@ class _rrulestr(object):
raise ValueError("unsupported EXRULE parm: "+parm)
exrulevals.append(value)
elif name == "EXDATE":
for parm in parms:
if parm != "VALUE=DATE-TIME":
raise ValueError("unsupported EXDATE parm: "+parm)
exdatevals.append(value)
exdatevals.extend(
self._parse_date_value(value, parms,
TZID_NAMES, ignoretz,
tzids, tzinfos)
)
elif name == "DTSTART":
# RFC 5445 3.8.2.4: The VALUE parameter is optional, but
# may be found only once.
value_found = False
TZID = None
valid_values = {"VALUE=DATE-TIME", "VALUE=DATE"}
for parm in parms:
if parm.startswith("TZID="):
try:
tzkey = TZID_NAMES[parm.split('TZID=')[-1]]
except KeyError:
continue
if tzids is None:
from . import tz
tzlookup = tz.gettz
elif callable(tzids):
tzlookup = tzids
else:
tzlookup = getattr(tzids, 'get', None)
if tzlookup is None:
msg = ('tzids must be a callable, ' +
'mapping, or None, ' +
'not %s' % tzids)
raise ValueError(msg)
TZID = tzlookup(tzkey)
continue
if parm not in valid_values:
raise ValueError("unsupported DTSTART parm: "+parm)
else:
if value_found:
msg = ("Duplicate value parameter found in " +
"DTSTART: " + parm)
raise ValueError(msg)
value_found = True
if not parser:
from dateutil import parser
dtstart = parser.parse(value, ignoretz=ignoretz,
tzinfos=tzinfos)
if TZID is not None:
if dtstart.tzinfo is None:
dtstart = dtstart.replace(tzinfo=TZID)
else:
raise ValueError('DTSTART specifies multiple timezones')
dtvals = self._parse_date_value(value, parms, TZID_NAMES,
ignoretz, tzids, tzinfos)
if len(dtvals) != 1:
raise ValueError("Multiple DTSTART values specified:" +
value)
dtstart = dtvals[0]
else:
raise ValueError("unsupported property: "+name)
if (forceset or len(rrulevals) > 1 or rdatevals
@ -1649,10 +1717,7 @@ class _rrulestr(object):
ignoretz=ignoretz,
tzinfos=tzinfos))
for value in exdatevals:
for datestr in value.split(','):
rset.exdate(parser.parse(datestr,
ignoretz=ignoretz,
tzinfos=tzinfos))
rset.exdate(value)
if compatible and dtstart:
rset.rdate(dtstart)
return rset

View file

@ -2,11 +2,6 @@
from .tz import *
from .tz import __doc__
#: Convenience constant providing a :class:`tzutc()` instance
#:
#: .. versionadded:: 2.7.0
UTC = tzutc()
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz",
"enfold", "datetime_ambiguous", "datetime_exists",

View file

@ -1,4 +1,4 @@
from six import PY3
from six import PY2
from functools import wraps
@ -16,14 +16,18 @@ def tzname_in_python2(namefunc):
tzname() API changed in Python 3. It used to return bytes, but was changed
to unicode strings
"""
def adjust_encoding(*args, **kwargs):
name = namefunc(*args, **kwargs)
if name is not None and not PY3:
name = name.encode()
if PY2:
@wraps(namefunc)
def adjust_encoding(*args, **kwargs):
name = namefunc(*args, **kwargs)
if name is not None:
name = name.encode()
return name
return name
return adjust_encoding
return adjust_encoding
else:
return namefunc
# The following is adapted from Alexander Belopolsky's tz library
@ -208,7 +212,7 @@ class _tzinfo(tzinfo):
Since this is the one time that we *know* we have an unambiguous
datetime object, we take this opportunity to determine whether the
datetime is ambiguous and in a "fold" state (e.g. if it's the first
occurence, chronologically, of the ambiguous datetime).
occurrence, chronologically, of the ambiguous datetime).
:param dt:
A timezone-aware :class:`datetime.datetime` object.
@ -246,7 +250,7 @@ class _tzinfo(tzinfo):
Since this is the one time that we *know* we have an unambiguous
datetime object, we take this opportunity to determine whether the
datetime is ambiguous and in a "fold" state (e.g. if it's the first
occurance, chronologically, of the ambiguous datetime).
occurrence, chronologically, of the ambiguous datetime).
:param dt:
A timezone-aware :class:`datetime.datetime` object.

View file

@ -1,4 +1,8 @@
from datetime import timedelta
import weakref
from collections import OrderedDict
from six.moves import _thread
class _TzSingleton(type):
@ -11,6 +15,7 @@ class _TzSingleton(type):
cls.__instance = super(_TzSingleton, cls).__call__()
return cls.__instance
class _TzFactory(type):
def instance(cls, *args, **kwargs):
"""Alternate constructor that returns a fresh instance"""
@ -19,7 +24,11 @@ class _TzFactory(type):
class _TzOffsetFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = {}
cls.__instances = weakref.WeakValueDictionary()
cls.__strong_cache = OrderedDict()
cls.__strong_cache_size = 8
cls._cache_lock = _thread.allocate_lock()
def __call__(cls, name, offset):
if isinstance(offset, timedelta):
@ -31,12 +40,25 @@ class _TzOffsetFactory(_TzFactory):
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(name, offset))
# This lock may not be necessary in Python 3. See GH issue #901
with cls._cache_lock:
cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
# Remove an item if the strong cache is overpopulated
if len(cls.__strong_cache) > cls.__strong_cache_size:
cls.__strong_cache.popitem(last=False)
return instance
class _TzStrFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = {}
cls.__instances = weakref.WeakValueDictionary()
cls.__strong_cache = OrderedDict()
cls.__strong_cache_size = 8
cls.__cache_lock = _thread.allocate_lock()
def __call__(cls, s, posix_offset=False):
key = (s, posix_offset)
@ -45,5 +67,14 @@ class _TzStrFactory(_TzFactory):
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(s, posix_offset))
# This lock may not be necessary in Python 3. See GH issue #901
with cls.__cache_lock:
cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
# Remove an item if the strong cache is overpopulated
if len(cls.__strong_cache) > cls.__strong_cache_size:
cls.__strong_cache.popitem(last=False)
return instance

View file

@ -13,6 +13,8 @@ import time
import sys
import os
import bisect
import weakref
from collections import OrderedDict
import six
from six import string_types
@ -28,6 +30,9 @@ try:
except ImportError:
tzwin = tzwinlocal = None
# For warning about rounding tzinfo
from warnings import warn
ZERO = datetime.timedelta(0)
EPOCH = datetime.datetime.utcfromtimestamp(0)
EPOCHORDINAL = EPOCH.toordinal()
@ -118,6 +123,12 @@ class tzutc(datetime.tzinfo):
__reduce__ = object.__reduce__
#: Convenience constant providing a :class:`tzutc()` instance
#:
#: .. versionadded:: 2.7.0
UTC = tzutc()
@six.add_metaclass(_TzOffsetFactory)
class tzoffset(datetime.tzinfo):
"""
@ -137,7 +148,8 @@ class tzoffset(datetime.tzinfo):
offset = offset.total_seconds()
except (TypeError, AttributeError):
pass
self._offset = datetime.timedelta(seconds=offset)
self._offset = datetime.timedelta(seconds=_get_supported_offset(offset))
def utcoffset(self, dt):
return self._offset
@ -373,7 +385,7 @@ class _tzfile(object):
class tzfile(_tzinfo):
"""
This is a ``tzinfo`` subclass thant allows one to use the ``tzfile(5)``
This is a ``tzinfo`` subclass that allows one to use the ``tzfile(5)``
format timezone files to extract current and historical zone information.
:param fileobj:
@ -460,7 +472,7 @@ class tzfile(_tzinfo):
if fileobj is not None:
if not file_opened_here:
fileobj = _ContextWrapper(fileobj)
fileobj = _nullcontext(fileobj)
with fileobj as file_stream:
tzobj = self._read_tzfile(file_stream)
@ -600,10 +612,7 @@ class tzfile(_tzinfo):
out.ttinfo_list = []
for i in range(typecnt):
gmtoff, isdst, abbrind = ttinfo[i]
# Round to full-minutes if that's not the case. Python's
# datetime doesn't accept sub-minute timezones. Check
# http://python.org/sf/1447945 for some information.
gmtoff = 60 * ((gmtoff + 30) // 60)
gmtoff = _get_supported_offset(gmtoff)
tti = _ttinfo()
tti.offset = gmtoff
tti.dstoffset = datetime.timedelta(0)
@ -655,37 +664,44 @@ class tzfile(_tzinfo):
# isgmt are off, so it should be in wall time. OTOH, it's
# always in gmt time. Let me know if you have comments
# about this.
laststdoffset = None
lastdst = None
lastoffset = None
lastdstoffset = None
lastbaseoffset = None
out.trans_list = []
for i, tti in enumerate(out.trans_idx):
if not tti.isdst:
offset = tti.offset
laststdoffset = offset
else:
if laststdoffset is not None:
# Store the DST offset as well and update it in the list
tti.dstoffset = tti.offset - laststdoffset
out.trans_idx[i] = tti
offset = tti.offset
dstoffset = 0
offset = laststdoffset or 0
if lastdst is not None:
if tti.isdst:
if not lastdst:
dstoffset = offset - lastoffset
out.trans_list.append(out.trans_list_utc[i] + offset)
if not dstoffset and lastdstoffset:
dstoffset = lastdstoffset
# In case we missed any DST offsets on the way in for some reason, make
# a second pass over the list, looking for the /next/ DST offset.
laststdoffset = None
for i in reversed(range(len(out.trans_idx))):
tti = out.trans_idx[i]
if tti.isdst:
if not (tti.dstoffset or laststdoffset is None):
tti.dstoffset = tti.offset - laststdoffset
else:
laststdoffset = tti.offset
tti.dstoffset = datetime.timedelta(seconds=dstoffset)
lastdstoffset = dstoffset
if not isinstance(tti.dstoffset, datetime.timedelta):
tti.dstoffset = datetime.timedelta(seconds=tti.dstoffset)
# If a time zone changes its base offset during a DST transition,
# then you need to adjust by the previous base offset to get the
# transition time in local time. Otherwise you use the current
# base offset. Ideally, I would have some mathematical proof of
# why this is true, but I haven't really thought about it enough.
baseoffset = offset - dstoffset
adjustment = baseoffset
if (lastbaseoffset is not None and baseoffset != lastbaseoffset
and tti.isdst != lastdst):
# The base DST has changed
adjustment = lastbaseoffset
out.trans_idx[i] = tti
lastdst = tti.isdst
lastoffset = offset
lastbaseoffset = baseoffset
out.trans_list.append(out.trans_list_utc[i] + adjustment)
out.trans_idx = tuple(out.trans_idx)
out.trans_list = tuple(out.trans_list)
@ -1255,7 +1271,7 @@ class tzical(object):
fileobj = open(fileobj, 'r')
else:
self._s = getattr(fileobj, 'name', repr(fileobj))
fileobj = _ContextWrapper(fileobj)
fileobj = _nullcontext(fileobj)
self._vtz = {}
@ -1528,7 +1544,9 @@ def __get_gettz():
"""
def __init__(self):
self.__instances = {}
self.__instances = weakref.WeakValueDictionary()
self.__strong_cache_size = 8
self.__strong_cache = OrderedDict()
self._cache_lock = _thread.allocate_lock()
def __call__(self, name=None):
@ -1537,17 +1555,37 @@ def __get_gettz():
if rv is None:
rv = self.nocache(name=name)
if not (name is None or isinstance(rv, tzlocal_classes)):
if not (name is None
or isinstance(rv, tzlocal_classes)
or rv is None):
# tzlocal is slightly more complicated than the other
# time zone providers because it depends on environment
# at construction time, so don't cache that.
#
# We also cannot store weak references to None, so we
# will also not store that.
self.__instances[name] = rv
else:
# No need for strong caching, return immediately
return rv
self.__strong_cache[name] = self.__strong_cache.pop(name, rv)
if len(self.__strong_cache) > self.__strong_cache_size:
self.__strong_cache.popitem(last=False)
return rv
def set_cache_size(self, size):
with self._cache_lock:
self.__strong_cache_size = size
while len(self.__strong_cache) > size:
self.__strong_cache.popitem(last=False)
def cache_clear(self):
with self._cache_lock:
self.__instances = {}
self.__instances = weakref.WeakValueDictionary()
self.__strong_cache.clear()
@staticmethod
def nocache(name=None):
@ -1558,7 +1596,7 @@ def __get_gettz():
name = os.environ["TZ"]
except KeyError:
pass
if name is None or name == ":":
if name is None or name in ("", ":"):
for filepath in TZFILES:
if not os.path.isabs(filepath):
filename = filepath
@ -1577,8 +1615,15 @@ def __get_gettz():
else:
tz = tzlocal()
else:
if name.startswith(":"):
name = name[1:]
try:
if name.startswith(":"):
name = name[1:]
except TypeError as e:
if isinstance(name, bytes):
new_msg = "gettz argument should be str, not bytes"
six.raise_from(TypeError(new_msg), e)
else:
raise
if os.path.isabs(name):
if os.path.isfile(name):
tz = tzfile(name)
@ -1601,7 +1646,8 @@ def __get_gettz():
if tzwin is not None:
try:
tz = tzwin(name)
except WindowsError:
except (WindowsError, UnicodeEncodeError):
# UnicodeEncodeError is for Python 2.7 compat
tz = None
if not tz:
@ -1622,7 +1668,7 @@ def __get_gettz():
break
else:
if name in ("GMT", "UTC"):
tz = tzutc()
tz = UTC
elif name in time.tzname:
tz = tzlocal()
return tz
@ -1662,7 +1708,7 @@ def datetime_exists(dt, tz=None):
# This is essentially a test of whether or not the datetime can survive
# a round trip to UTC.
dt_rt = dt.replace(tzinfo=tz).astimezone(tzutc()).astimezone(tz)
dt_rt = dt.replace(tzinfo=tz).astimezone(UTC).astimezone(tz)
dt_rt = dt_rt.replace(tzinfo=None)
return dt == dt_rt
@ -1768,18 +1814,36 @@ def _datetime_to_timestamp(dt):
return (dt.replace(tzinfo=None) - EPOCH).total_seconds()
class _ContextWrapper(object):
"""
Class for wrapping contexts so that they are passed through in a
with statement.
"""
def __init__(self, context):
self.context = context
if sys.version_info >= (3, 6):
def _get_supported_offset(second_offset):
return second_offset
else:
def _get_supported_offset(second_offset):
# For python pre-3.6, round to full-minutes if that's not the case.
# Python's datetime doesn't accept sub-minute timezones. Check
# http://python.org/sf/1447945 or https://bugs.python.org/issue5288
# for some information.
old_offset = second_offset
calculated_offset = 60 * ((second_offset + 30) // 60)
return calculated_offset
def __enter__(self):
return self.context
def __exit__(*args, **kwargs):
pass
try:
# Python 3.7 feature
from contextlib import nullcontext as _nullcontext
except ImportError:
class _nullcontext(object):
"""
Class for wrapping contexts so that they are passed through in a
with statement.
"""
def __init__(self, context):
self.context = context
def __enter__(self):
return self.context
def __exit__(*args, **kwargs):
pass
# vim:ts=4:sw=4:et

View file

@ -1,3 +1,11 @@
# -*- coding: utf-8 -*-
"""
This module provides an interface to the native time zone data on Windows,
including :py:class:`datetime.tzinfo` implementations.
Attempting to import this module on a non-Windows platform will raise an
:py:obj:`ImportError`.
"""
# This code was originally contributed by Jeffrey Harris.
import datetime
import struct
@ -39,7 +47,7 @@ TZKEYNAME = _settzkeyname()
class tzres(object):
"""
Class for accessing `tzres.dll`, which contains timezone name related
Class for accessing ``tzres.dll``, which contains timezone name related
resources.
.. versionadded:: 2.5.0
@ -72,9 +80,10 @@ class tzres(object):
:param offset:
A positive integer value referring to a string from the tzres dll.
..note:
.. note::
Offsets found in the registry are generally of the form
`@tzres.dll,-114`. The offset in this case if 114, not -114.
``@tzres.dll,-114``. The offset in this case is 114, not -114.
"""
resource = self.p_wchar()
@ -146,6 +155,9 @@ class tzwinbase(tzrangebase):
return result
def display(self):
"""
Return the display name of the time zone.
"""
return self._display
def transitions(self, year):
@ -188,6 +200,17 @@ class tzwinbase(tzrangebase):
class tzwin(tzwinbase):
"""
Time zone object created from the zone info in the Windows registry
These are similar to :py:class:`dateutil.tz.tzrange` objects in that
the time zone data is provided in the format of a single offset rule
for either 0 or 2 time zone transitions per year.
:param: name
The name of a Windows time zone key, e.g. "Eastern Standard Time".
The full list of keys can be retrieved with :func:`tzwin.list`.
"""
def __init__(self, name):
self._name = name
@ -234,6 +257,22 @@ class tzwin(tzwinbase):
class tzwinlocal(tzwinbase):
"""
Class representing the local time zone information in the Windows registry
While :class:`dateutil.tz.tzlocal` makes system calls (via the :mod:`time`
module) to retrieve time zone information, ``tzwinlocal`` retrieves the
rules directly from the Windows registry and creates an object like
:class:`dateutil.tz.tzwin`.
Because Windows does not have an equivalent of :func:`time.tzset`, on
Windows, :class:`dateutil.tz.tzlocal` instances will always reflect the
time zone settings *at the time that the process was started*, meaning
changes to the machine's time zone settings during the run of a program
on Windows will **not** be reflected by :class:`dateutil.tz.tzlocal`.
Because ``tzwinlocal`` reads the registry directly, it is unaffected by
this issue.
"""
def __init__(self):
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle:
with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey:

View file

@ -28,7 +28,7 @@ def today(tzinfo=None):
def default_tzinfo(dt, tzinfo):
"""
Sets the the ``tzinfo`` parameter on naive datetimes only
Sets the ``tzinfo`` parameter on naive datetimes only
This is useful for example when you are provided a datetime that may have
either an implicit or explicit time zone, such as when parsing a time zone
@ -63,7 +63,7 @@ def default_tzinfo(dt, tzinfo):
def within_delta(dt1, dt2, delta):
"""
Useful for comparing two datetimes that may a negilible difference
Useful for comparing two datetimes that may have a negligible difference
to be considered equal.
"""
delta = abs(delta)

View file

@ -3,7 +3,7 @@ import os
import tempfile
import shutil
import json
from subprocess import check_call
from subprocess import check_call, check_output
from tarfile import TarFile
from dateutil.zoneinfo import METADATA_FN, ZONEFILENAME
@ -23,11 +23,9 @@ def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None):
for name in zonegroups:
tf.extract(name, tmpdir)
filepaths = [os.path.join(tmpdir, n) for n in zonegroups]
try:
check_call(["zic", "-d", zonedir] + filepaths)
except OSError as e:
_print_on_nosuchfile(e)
raise
_run_zic(zonedir, filepaths)
# write metadata file
with open(os.path.join(zonedir, METADATA_FN), 'w') as f:
json.dump(metadata, f, indent=4, sort_keys=True)
@ -40,6 +38,30 @@ def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None):
shutil.rmtree(tmpdir)
def _run_zic(zonedir, filepaths):
"""Calls the ``zic`` compiler in a compatible way to get a "fat" binary.
Recent versions of ``zic`` default to ``-b slim``, while older versions
don't even have the ``-b`` option (but default to "fat" binaries). The
current version of dateutil does not support Version 2+ TZif files, which
causes problems when used in conjunction with "slim" binaries, so this
function is used to ensure that we always get a "fat" binary.
"""
try:
help_text = check_output(["zic", "--help"])
except OSError as e:
_print_on_nosuchfile(e)
raise
if b"-b " in help_text:
bloat_args = ["-b", "fat"]
else:
bloat_args = []
check_call(["zic"] + bloat_args + ["-d", zonedir] + filepaths)
def _print_on_nosuchfile(e):
"""Print helpful troubleshooting message

View file

@ -142,7 +142,7 @@ def main(args=None): # pylint:disable=too-many-branches
if options.get('yaml'):
try:
import yaml # pylint:disable=unused-variable
import yaml # pylint:disable=unused-variable,unused-import
except ImportError: # pragma: no cover
del options['yaml']
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)

View file

@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '3.0.3'
__version__ = '3.1.1'

View file

@ -82,6 +82,19 @@ def properties(options=None):
return default_api.properties(options)
def suggested_expected(titles, options=None):
"""
Return a list of suggested titles to be used as `expected_title` based on the list of titles
:param titles: the filename or release name
:type titles: list|set|dict
:param options:
:type options: str|dict
:return:
:rtype: list of str
"""
return default_api.suggested_expected(titles, options)
class GuessItApi(object):
"""
An api class that can be configured with custom Rebulk configuration.
@ -228,5 +241,23 @@ class GuessItApi(object):
ordered = self.rebulk.customize_properties(ordered)
return ordered
def suggested_expected(self, titles, options=None):
"""
Return a list of suggested titles to be used as `expected_title` based on the list of titles
:param titles: the filename or release name
:type titles: list|set|dict
:param options:
:type options: str|dict
:return:
:rtype: list of str
"""
suggested = []
for title in titles:
guess = self.guessit(title, options)
if len(guess) != 2 or 'title' not in guess:
suggested.append(title)
return suggested
default_api = GuessItApi()

View file

@ -4,7 +4,7 @@
Backports
"""
# pragma: no-cover
# pylint: disabled
# pylint: skip-file
def cmp_to_key(mycmp):
"""functools.cmp_to_key backport"""

View file

@ -1,18 +1,19 @@
{
"expected_title": [
"OSS 117"
"OSS 117",
"This is Us"
],
"allowed_countries": [
"au",
"us",
"gb"
"gb",
"us"
],
"allowed_languages": [
"ca",
"cs",
"de",
"en",
"es",
"ca",
"cs",
"fr",
"he",
"hi",
@ -20,7 +21,9 @@
"it",
"ja",
"ko",
"mul",
"nl",
"no",
"pl",
"pt",
"ro",
@ -28,18 +31,50 @@
"sv",
"te",
"uk",
"mul",
"und"
],
"advanced_config": {
"common_words": [
"ca",
"cat",
"de",
"it"
"he",
"it",
"no",
"por",
"rum",
"se",
"st",
"sub"
],
"groups": {
"starting": "([{",
"ending": ")]}"
},
"audio_codec": {
"audio_channels": {
"1.0": [
"1ch",
"mono"
],
"2.0": [
"2ch",
"stereo",
"re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
],
"5.1": [
"5ch",
"6ch",
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
],
"7.1": [
"7ch",
"8ch",
"re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
]
}
},
"container": {
"subtitles": [
"srt",
@ -59,9 +94,10 @@
"avi",
"divx",
"flv",
"mk3d",
"iso",
"m4v",
"mk2",
"mk3d",
"mka",
"mkv",
"mov",
@ -77,12 +113,11 @@
"ram",
"rm",
"ts",
"vob",
"wav",
"webm",
"wma",
"wmv",
"iso",
"vob"
"wmv"
],
"torrent": [
"torrent"
@ -255,7 +290,6 @@
],
"subtitle_prefixes": [
"st",
"v",
"vost",
"subforced",
"fansub",
@ -297,12 +331,12 @@
},
"release_group": {
"forbidden_names": [
"rip",
"bonus",
"by",
"for",
"par",
"pour",
"bonus"
"rip"
],
"ignored_seps": "[]{}()"
},
@ -311,6 +345,7 @@
"23.976",
"24",
"25",
"29.970",
"30",
"48",
"50",
@ -329,6 +364,7 @@
"progressive": [
"360",
"480",
"540",
"576",
"900",
"1080",
@ -342,8 +378,8 @@
"website": {
"safe_tlds": [
"com",
"org",
"net"
"net",
"org"
],
"safe_subdomains": [
"www"
@ -351,12 +387,200 @@
"safe_prefixes": [
"co",
"com",
"org",
"net"
"net",
"org"
],
"prefixes": [
"from"
]
},
"streaming_service": {
"A&E": [
"AE",
"A&E"
],
"ABC": "AMBC",
"ABC Australia": "AUBC",
"Al Jazeera English": "AJAZ",
"AMC": "AMC",
"Amazon Prime": [
"AMZN",
"Amazon",
"re:Amazon-?Prime"
],
"Adult Swim": [
"AS",
"re:Adult-?Swim"
],
"America's Test Kitchen": "ATK",
"Animal Planet": "ANPL",
"AnimeLab": "ANLB",
"AOL": "AOL",
"ARD": "ARD",
"BBC iPlayer": [
"iP",
"re:BBC-?iPlayer"
],
"BravoTV": "BRAV",
"Canal+": "CNLP",
"Cartoon Network": "CN",
"CBC": "CBC",
"CBS": "CBS",
"CNBC": "CNBC",
"Comedy Central": [
"CC",
"re:Comedy-?Central"
],
"Channel 4": "4OD",
"CHRGD": "CHGD",
"Cinemax": "CMAX",
"Country Music Television": "CMT",
"Comedians in Cars Getting Coffee": "CCGC",
"Crunchy Roll": [
"CR",
"re:Crunchy-?Roll"
],
"Crackle": "CRKL",
"CSpan": "CSPN",
"CTV": "CTV",
"CuriosityStream": "CUR",
"CWSeed": "CWS",
"Daisuki": "DSKI",
"DC Universe": "DCU",
"Deadhouse Films": "DHF",
"DramaFever": [
"DF",
"DramaFever"
],
"Digiturk Diledigin Yerde": "DDY",
"Discovery": [
"DISC",
"Discovery"
],
"Disney": [
"DSNY",
"Disney"
],
"DIY Network": "DIY",
"Doc Club": "DOCC",
"DPlay": "DPLY",
"E!": "ETV",
"ePix": "EPIX",
"El Trece": "ETTV",
"ESPN": "ESPN",
"Esquire": "ESQ",
"Family": "FAM",
"Family Jr": "FJR",
"Food Network": "FOOD",
"Fox": "FOX",
"Freeform": "FREE",
"FYI Network": "FYI",
"Global": "GLBL",
"GloboSat Play": "GLOB",
"Hallmark": "HLMK",
"HBO Go": [
"HBO",
"re:HBO-?Go"
],
"HGTV": "HGTV",
"History": [
"HIST",
"History"
],
"Hulu": "HULU",
"Investigation Discovery": "ID",
"IFC": "IFC",
"iTunes": "iTunes",
"ITV": "ITV",
"Knowledge Network": "KNOW",
"Lifetime": "LIFE",
"Motor Trend OnDemand": "MTOD",
"MBC": [
"MBC",
"MBCVOD"
],
"MSNBC": "MNBC",
"MTV": "MTV",
"National Geographic": [
"NATG",
"re:National-?Geographic"
],
"NBA TV": [
"NBA",
"re:NBA-?TV"
],
"NBC": "NBC",
"Netflix": [
"NF",
"Netflix"
],
"NFL": "NFL",
"NFL Now": "NFLN",
"NHL GameCenter": "GC",
"Nickelodeon": [
"NICK",
"Nickelodeon"
],
"Norsk Rikskringkasting": "NRK",
"OnDemandKorea": [
"ODK",
"OnDemandKorea"
],
"PBS": "PBS",
"PBS Kids": "PBSK",
"Playstation Network": "PSN",
"Pluzz": "PLUZ",
"RTE One": "RTE",
"SBS (AU)": "SBS",
"SeeSo": [
"SESO",
"SeeSo"
],
"Shomi": "SHMI",
"Spike": "SPIK",
"Spike TV": [
"SPKE",
"re:Spike-?TV"
],
"Sportsnet": "SNET",
"Sprout": "SPRT",
"Stan": "STAN",
"Starz": "STZ",
"Sveriges Television": "SVT",
"SwearNet": "SWER",
"Syfy": "SYFY",
"TBS": "TBS",
"TFou": "TFOU",
"The CW": [
"CW",
"re:The-?CW"
],
"TLC": "TLC",
"TubiTV": "TUBI",
"TV3 Ireland": "TV3",
"TV4 Sweeden": "TV4",
"TVING": "TVING",
"TV Land": [
"TVL",
"re:TV-?Land"
],
"UFC": "UFC",
"UKTV": "UKTV",
"Univision": "UNIV",
"USA Network": "USAN",
"Velocity": "VLCT",
"VH1": "VH1",
"Viceland": "VICE",
"Viki": "VIKI",
"Vimeo": "VMEO",
"VRV": "VRV",
"W Network": "WNET",
"WatchMe": "WME",
"WWE Network": "WWEN",
"Xbox Video": "XBOX",
"Yahoo": "YHOO",
"YouTube Red": "RED",
"ZDF": "ZDF"
}
}
}
}

View file

@ -128,7 +128,7 @@ class ConfigurationException(Exception):
"""
Exception related to configuration file.
"""
pass
pass # pylint:disable=unnecessary-pass
def load_config(options):
@ -153,7 +153,7 @@ def load_config(options):
cwd = os.getcwd()
yaml_supported = False
try:
import yaml # pylint: disable=unused-variable
import yaml # pylint:disable=unused-variable,unused-import
yaml_supported = True
except ImportError:
pass
@ -252,7 +252,7 @@ def load_config_file(filepath):
try:
import yaml
with open(filepath) as config_file_data:
return yaml.load(config_file_data)
return yaml.load(config_file_data, yaml.SafeLoader)
except ImportError: # pragma: no cover
raise ConfigurationException('Configuration file extension is not supported. '
'PyYAML should be installed to support "%s" file' % (

View file

@ -25,7 +25,7 @@ def _potential_before(i, input_string):
:return:
:rtype: bool
"""
return i - 2 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
def _potential_after(i, input_string):

View file

@ -28,7 +28,7 @@ def int_coercable(string):
return False
def compose(*validators):
def and_(*validators):
"""
Compose validators functions
:param validators:
@ -49,3 +49,26 @@ def compose(*validators):
return False
return True
return composed
def or_(*validators):
"""
Compose validators functions
:param validators:
:type validators:
:return:
:rtype:
"""
def composed(string):
"""
Composed validators function
:param string:
:type string:
:return:
:rtype:
"""
for validator in validators:
if validator(string):
return True
return False
return composed

View file

@ -0,0 +1,20 @@
"""
Match processors
"""
from guessit.rules.common import seps
def strip(match, chars=seps):
"""
Strip given characters from match.
:param chars:
:param match:
:return:
"""
while match.input_string[match.start] in chars:
match.start += 1
while match.input_string[match.end - 1] in chars:
match.end -= 1
if not match:
return False

View file

@ -34,7 +34,9 @@ class EnlargeGroupMatches(CustomRule):
for match in matches.ending(group.end - 1):
ending.append(match)
return starting, ending
if starting or ending:
return starting, ending
return False
def then(self, matches, when_response, context):
starting, ending = when_response

View file

@ -3,9 +3,8 @@
"""
audio_codec, audio_profile and audio_channels property
"""
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from ..common import dash
from ..common.pattern import is_disabled
@ -23,7 +22,9 @@ def audio_codec(config): # pylint:disable=unused-argument
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk = Rebulk()\
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
.string_defaults(ignore_case=True)
def audio_codec_priority(match1, match2):
"""
@ -61,7 +62,9 @@ def audio_codec(config): # pylint:disable=unused-argument
rebulk.string('PCM', value='PCM')
rebulk.string('LPCM', value='LPCM')
rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.defaults(clear=True,
name='audio_profile',
disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
@ -70,17 +73,19 @@ def audio_codec(config): # pylint:disable=unused-argument
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True)
rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True)
rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True)
rebulk.defaults(clear=True,
name="audio_channels",
disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
rebulk.string('7ch', '8ch', value='7.1')
rebulk.string('5ch', '6ch', value='5.1')
rebulk.string('2ch', 'stereo', value='2.0')
rebulk.string('1ch', 'mono', value='1.0')
for value, items in config.get('audio_channels').items():
for item in items:
if item.startswith('re:'):
rebulk.regex(item[3:], value=value, children=True)
else:
rebulk.string(item, value=value)
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
AudioChannelsValidatorRule)

View file

@ -69,4 +69,6 @@ class BitRateTypeRule(Rule):
else:
to_rename.append(match)
return to_rename, to_remove
if to_rename or to_remove:
return to_rename, to_remove
return False

View file

@ -26,7 +26,8 @@ def bonus(config): # pylint:disable=unused-argument
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
validator={'__parent__': lambda match: seps_surround},
validator={'__parent__': seps_surround},
validate_all=True,
conflict_solver=lambda match, conflicting: match
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
else '__default__')

View file

@ -44,7 +44,8 @@ def container(config):
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
rebulk.defaults(name='container',
rebulk.defaults(clear=True,
name='container',
validator=seps_surround,
formatter=lambda s: s.lower(),
conflict_solver=lambda match, other: match

View file

@ -10,6 +10,7 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PRO
from ..common import seps, title_seps
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import or_
from ..properties.title import TitleFromPosition, TitleBaseRule
from ..properties.type import TypeProcessor
@ -133,8 +134,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
def hole_filter(self, hole, matches):
episode = matches.previous(hole,
lambda previous: any(name in previous.names
for name in self.previous_names),
lambda previous: previous.named(*self.previous_names),
0)
crc32 = matches.named('crc32')
@ -179,8 +179,7 @@ class AlternativeTitleReplace(Rule):
predicate=lambda match: 'title' in match.tags, index=0)
if main_title:
episode = matches.previous(main_title,
lambda previous: any(name in previous.names
for name in self.previous_names),
lambda previous: previous.named(*self.previous_names),
0)
crc32 = matches.named('crc32')
@ -249,7 +248,7 @@ class Filepart3EpisodeTitle(Rule):
if season:
hole = matches.holes(subdirectory.start, subdirectory.end,
ignore=lambda match: 'weak-episode' in match.tags,
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
index=0)
if hole:
@ -292,7 +291,8 @@ class Filepart2EpisodeTitle(Rule):
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
if season:
hole = matches.holes(directory.start, directory.end, ignore=lambda match: 'weak-episode' in match.tags,
hole = matches.holes(directory.start, directory.end,
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps,
predicate=lambda match: match.value, index=0)
if hole:

View file

@ -11,12 +11,13 @@ from rebulk.match import Match
from rebulk.remodule import re
from rebulk.utils import is_iterable
from guessit.rules import match_processors
from guessit.rules.common.numeral import parse_numeral, numeral
from .title import TitleFromPosition
from ..common import dash, alt_dash, seps, seps_no_fs
from ..common.formatters import strip
from ..common.numeral import numeral, parse_numeral
from ..common.pattern import is_disabled
from ..common.validators import compose, seps_surround, seps_before, int_coercable
from ..common.validators import seps_surround, int_coercable, and_
from ...reutils import build_or_pattern
@ -29,17 +30,12 @@ def episodes(config):
:return: Created Rebulk object
:rtype: Rebulk
"""
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
def is_season_episode_disabled(context):
"""Whether season and episode rules should be enabled."""
return is_disabled(context, 'episode') or is_disabled(context, 'season')
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
def episodes_season_chain_breaker(matches):
"""
Break chains if there's more than 100 offset between two neighbor values.
@ -57,8 +53,6 @@ def episodes(config):
return True
return False
rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
def season_episode_conflict_solver(match, other):
"""
Conflict solver for episode/season patterns
@ -76,7 +70,6 @@ def episodes(config):
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
and not match.initiator.children.named(match.name + 'Marker')) or (
other.name == 'screen_size' and not int_coercable(other.raw)):
return match
if other.name in ('season', 'episode') and match.initiator != other.initiator:
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
@ -87,21 +80,6 @@ def episodes(config):
return current
return '__default__'
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
max_range_gap = config['max_range_gap']
def ordering_validator(match):
"""
Validator for season list. They should be in natural order to be validated.
@ -135,65 +113,18 @@ def episodes(config):
lambda m: m.name == property_name + 'Separator')
separator = match.children.previous(current_match,
lambda m: m.name == property_name + 'Separator', 0)
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
break
if separator:
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
break
previous_match = current_match
return valid
return is_consecutive('episode') and is_consecutive('season')
# S01E02, 01x02, S01S02S03
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
children=True,
private_parent=True,
validate_all=True,
validator={'__parent__': ordering_validator},
conflict_solver=season_episode_conflict_solver,
disabled=is_season_episode_disabled) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}).repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail, value=episode_detail, name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
def validate_roman(match):
"""
Validate a roman match if surrounded by separators
@ -206,117 +137,203 @@ def episodes(config):
return True
return seps_surround(match)
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
max_range_gap = config['max_range_gap']
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.string_defaults(ignore_case=True) \
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver,
abbreviations=[alt_dash])
# S01E02, 01x02, S01S02S03
rebulk.chain(
tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
.repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)').repeater('+') \
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)') \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail,
private_parent=False,
children=False,
value=episode_detail,
name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver)
rebulk.chain(abbreviations=[alt_dash],
rebulk.chain(validate_all=True,
conflict_solver=season_episode_conflict_solver,
formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'__parent__': compose(seps_surround, ordering_validator),
validator={'__parent__': and_(seps_surround, ordering_validator),
'season': validate_roman,
'count': validate_roman},
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
.defaults(validator=None) \
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'season': validate_roman, 'count': validate_roman},
conflict_solver=season_episode_conflict_solver) \
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
name='seasonSeparator', escape=True) +
r'@?(?P<season>\d+)').repeater('*')
rebulk.defaults(abbreviations=[dash])
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash],
validator={'episode': validate_roman},
formatter={'episode': parse_numeral, 'version': int, 'count': int},
formatter={'episode': parse_numeral},
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
tags=['SxxExx'],
abbreviations=[dash],
validator=None,
formatter={'season': int, 'other': lambda match: 'Complete'},
formatter={'other': lambda match: 'Complete'},
disabled=lambda context: is_disabled(context, 'season'))
# 12, 13
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
# 012, 013
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'0(?P<episode>\d{1,2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# 112, 113
rebulk.chain(tags=['weak-episode'],
formatter={'episode': int, 'version': int},
name='weak_episode',
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
.regex(r'(?P<episode>\d{3,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
# 1, 2, 3
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d)') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# e112, e113, 1e18, 3e19
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
rebulk.chain(formatter={'season': int, 'episode': int, 'version': int},
disabled=lambda context: is_disabled(context, 'episode')) \
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# ep 112, ep113, ep112, ep113
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int},
disabled=lambda context: is_disabled(context, 'episode')) \
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'ep-?(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# cap 112, cap 112_114
rebulk.chain(abbreviations=[dash],
tags=['see-pattern'],
formatter={'season': int, 'episode': int},
rebulk.chain(tags=['see-pattern'],
disabled=is_season_episode_disabled) \
.defaults(validator=None) \
.defaults(validator=None, tags=['see-pattern']) \
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
# 102, 0102
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
formatter={'season': int, 'episode': int, 'version': int},
name='weak_duplicate',
conflict_solver=season_episode_conflict_solver,
disabled=lambda context: (context.get('episode_prefer_number', False) or
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
.defaults(validator=None) \
.defaults(tags=['weak-episode', 'weak-duplicate'],
name='weak_duplicate',
validator=None,
conflict_solver=season_episode_conflict_solver) \
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int,
rebulk.regex(r'v(?P<version>\d+)',
formatter=int,
disabled=lambda context: is_disabled(context, 'version'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
@ -325,18 +342,23 @@ def episodes(config):
# detached of X count (season/episode)
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
abbreviations=[dash], children=True, private_parent=True, formatter=int,
formatter=int,
pre_match_processor=match_processors.strip,
disabled=lambda context: is_disabled(context, 'episode'))
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode",
rebulk.regex(r'Minisodes?',
children=False,
private_parent=False,
name='episode_format',
value="Minisode",
disabled=lambda context: is_disabled(context, 'episode_format'))
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
SeePatternRange(range_separators + ['_']),
EpisodeNumberSeparatorRange(range_separators),
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
return rebulk
@ -416,7 +438,9 @@ class WeakConflictSolver(Rule):
if to_append:
to_remove.extend(weak_dup_matches)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class CountValidator(Rule):
@ -442,7 +466,9 @@ class CountValidator(Rule):
season_count.append(count)
else:
to_remove.append(count)
return to_remove, episode_count, season_count
if to_remove or episode_count or season_count:
return to_remove, episode_count, season_count
return False
class SeePatternRange(Rule):
@ -477,7 +503,9 @@ class SeePatternRange(Rule):
to_remove.append(separator)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class AbstractSeparatorRange(Rule):
@ -533,7 +561,9 @@ class AbstractSeparatorRange(Rule):
previous_match = next_match
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class RenameToAbsoluteEpisode(Rule):
@ -629,20 +659,41 @@ class RemoveWeak(Rule):
Remove weak-episode matches which appears after video, source, and audio matches.
"""
priority = 16
consequence = RemoveMatch
consequence = RemoveMatch, AppendMatch
def __init__(self, episode_words):
super(RemoveWeak, self).__init__()
self.episode_words = episode_words
def when(self, matches, context):
to_remove = []
to_append = []
for filepart in matches.markers.named('path'):
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
if weaks:
previous = matches.previous(weaks[0], predicate=lambda m: m.name in (
weak = weaks[0]
previous = matches.previous(weak, predicate=lambda m: m.name in (
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
'audio_channels', 'audio_profile'), index=0)
if previous and not matches.holes(
previous.end, weaks[0].start, predicate=lambda m: m.raw.strip(seps)):
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
if previous.raw.lower() in self.episode_words:
try:
episode = copy.copy(weak)
episode.name = 'episode'
episode.value = int(weak.value)
episode.start = previous.start
episode.private = False
episode.tags = []
to_append.append(episode)
except ValueError:
pass
to_remove.extend(weaks)
return to_remove
if to_remove or to_append:
return to_remove, to_append
return False
class RemoveWeakIfSxxExx(Rule):
@ -856,4 +907,6 @@ class RenameToDiscMatch(Rule):
markers.append(marker)
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
return discs, markers, to_remove
if discs or markers or to_remove:
return discs, markers, to_remove
return False

View file

@ -72,6 +72,8 @@ def language(config, common_words):
UNDETERMINED = babelfish.Language('und')
MULTIPLE = babelfish.Language('mul')
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
@ -388,7 +390,9 @@ class SubtitlePrefixLanguageRule(Rule):
to_remove.extend(matches.conflicting(lang))
if prefix in to_remove:
to_remove.remove(prefix)
return to_rename, to_remove
if to_rename or to_remove:
return to_rename, to_remove
return False
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
@ -425,7 +429,9 @@ class SubtitleSuffixLanguageRule(Rule):
to_append.append(lang)
if suffix in to_remove:
to_remove.remove(suffix)
return to_append, to_remove
if to_append or to_remove:
return to_append, to_remove
return False
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
@ -478,6 +484,7 @@ class RemoveInvalidLanguages(Rule):
"""Remove language matches that matches the blacklisted common words."""
consequence = RemoveMatch
priority = 32
def __init__(self, common_words):
"""Constructor."""

View file

@ -11,7 +11,7 @@ from rebulk.remodule import re
from ..common import dash
from ..common import seps
from ..common.pattern import is_disabled
from ..common.validators import seps_after, seps_before, seps_surround, compose
from ..common.validators import seps_after, seps_before, seps_surround, and_
from ...reutils import build_or_pattern
from ...rules.common.formatters import raw_cleanup
@ -35,11 +35,16 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
rebulk.string('Proper', 'Repack', 'Rerip', value='Proper',
rebulk.string('Repack', 'Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Proper', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.regex('Real', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
@ -72,16 +77,18 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
value={'other': 'Complete'},
tags=['release-group-prefix'],
validator={'__parent__': compose(seps_surround, validate_complete)})
validator={'__parent__': and_(seps_surround, validate_complete)})
rebulk.string('R5', value='Region 5')
rebulk.string('RC', value='Region C')
rebulk.regex('Pre-?Air', value='Preair')
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'):
for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
rebulk.string(value, value=value)
rebulk.string('3D', value='3D', tags='has-neighbor')
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
rebulk.string('HR', value='High Resolution')
@ -90,6 +97,7 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.string('mHD', 'HDLight', value='Micro HD')
rebulk.string('LDTV', value='Low Definition')
rebulk.string('HFR', value='High Frame Rate')
rebulk.string('VFR', value='Variable Frame Rate')
rebulk.string('HD', value='HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
@ -128,13 +136,15 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
rebulk.string('Extras', value='Extras', tags='has-neighbor')
rebulk.regex('Digital-?Extras?', value='Extras')
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
ValidateAtEnd, ProperCountRule)
ValidateAtEnd, ValidateReal, ProperCountRule)
return rebulk
@ -354,3 +364,20 @@ class ValidateAtEnd(Rule):
to_remove.append(match)
return to_remove
class ValidateReal(Rule):
"""
Validate Real
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
if not matches.range(filepart.start, match.start):
ret.append(match)
return ret

View file

@ -8,7 +8,7 @@ from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_surround, int_coercable, compose
from ..common.validators import seps_surround, int_coercable, and_
from ..common.numeral import numeral, parse_numeral
from ...reutils import build_or_pattern
@ -41,6 +41,6 @@ def part(config): # pylint:disable=unused-argument
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
return rebulk

View file

@ -9,8 +9,8 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
from rebulk.match import Match
from ..common import seps
from ..common.expected import build_expected_function
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import int_coercable, seps_surround
@ -50,7 +50,7 @@ def release_group(config):
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
string = string[:len(forbidden)]
string = string.strip(groupname_seps)
return string
return string.strip()
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
@ -72,7 +72,9 @@ _scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'a
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
_scene_previous_tags = ('release-group-prefix', )
_scene_previous_tags = ('release-group-prefix',)
_scene_no_previous_tags = ('no-release-group-prefix',)
class DashSeparatedReleaseGroup(Rule):
@ -193,7 +195,8 @@ class DashSeparatedReleaseGroup(Rule):
if releasegroup.value:
to_append.append(releasegroup)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
class SceneReleaseGroup(Rule):
@ -212,6 +215,17 @@ class SceneReleaseGroup(Rule):
super(SceneReleaseGroup, self).__init__()
self.value_formatter = value_formatter
@staticmethod
def is_previous_match(match):
"""
Check if match can precede release_group
:param match:
:return:
"""
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
match.tagged(*_scene_previous_tags)
def when(self, matches, context): # pylint:disable=too-many-locals
# If a release_group is found before, ignore this kind of release_group rule.
@ -253,13 +267,12 @@ class SceneReleaseGroup(Rule):
if match.start < filepart.start:
return False
return not match.private or match.name in _scene_previous_names
return not match.private or self.is_previous_match(match)
previous_match = matches.previous(last_hole,
previous_match_filter,
index=0)
if previous_match and (previous_match.name in _scene_previous_names or
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
if previous_match and (self.is_previous_match(previous_match)) and \
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
and not int_coercable(last_hole.value.strip(seps)):
@ -300,11 +313,11 @@ class AnimeReleaseGroup(Rule):
# If a release_group is found before, ignore this kind of release_group rule.
if matches.named('release_group'):
return to_remove, to_append
return False
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
# This doesn't seems to be an anime, and we already found another release_group.
return to_remove, to_append
return False
for filepart in marker_sorted(matches.markers.named('path'), matches):
@ -328,4 +341,7 @@ class AnimeReleaseGroup(Rule):
to_append.append(group)
to_remove.extend(matches.range(empty_group.start, empty_group.end,
lambda m: 'weak-language' in m.tags))
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False

View file

@ -24,8 +24,8 @@ def screen_size(config):
:return: Created Rebulk object
:rtype: Rebulk
"""
interlaced = frozenset({res for res in config['interlaced']})
progressive = frozenset({res for res in config['progressive']})
interlaced = frozenset(config['interlaced'])
progressive = frozenset(config['progressive'])
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
min_ar = config['min_ar']
max_ar = config['max_ar']

View file

@ -12,7 +12,7 @@ from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
from .audio_codec import HqConflictRule
from ..common import dash, seps
from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after
from ..common.validators import seps_before, seps_after, or_
def source(config): # pylint:disable=unused-argument
@ -26,7 +26,10 @@ def source(config): # pylint:disable=unused-argument
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix'])
rebulk = rebulk.defaults(name='source',
tags=['video-codec-prefix', 'streaming_service.suffix'],
validate_all=True,
validator={'__parent__': or_(seps_before, seps_after)})
rip_prefix = '(?P<other>Rip)-?'
rip_suffix = '-?(?P<other>Rip)'
@ -42,7 +45,7 @@ def source(config): # pylint:disable=unused-argument
def demote_other(match, other): # pylint: disable=unused-argument
"""Default conflict solver with 'other' property."""
return other if other.name == 'other' else '__default__'
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
value={'source': 'VHS', 'other': 'Rip'})
@ -92,8 +95,9 @@ def source(config): # pylint:disable=unused-argument
# WEBCap is a synonym to WEBRip, mostly used by non english
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'WEB', 'DL-?WEB', 'DL(?=-?Mux)'),
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
value={'source': 'Web'})
rebulk.regex('(WEB)', value='Web', tags='weak.source')
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
value={'source': 'HD-DVD', 'other': 'Rip'})
@ -118,7 +122,7 @@ def source(config): # pylint:disable=unused-argument
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
value={'source': 'Satellite', 'other': 'Rip'})
rebulk.rules(ValidateSource, UltraHdBlurayRule)
rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
return rebulk
@ -170,32 +174,62 @@ class UltraHdBlurayRule(Rule):
to_remove.append(match)
to_append.append(new_source)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class ValidateSource(Rule):
class ValidateSourcePrefixSuffix(Rule):
"""
Validate source with screener property, with video_codec property or separated
Validate source with source prefix, source suffix.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for match in matches.named('source'):
match = match.initiator
if not seps_before(match) and \
not matches.range(match.start - 1, match.start - 2,
lambda m: 'source-prefix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
if not seps_after(match) and \
not matches.range(match.end, match.end + 1,
lambda m: 'source-suffix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
match = match.initiator
if not seps_before(match) and \
not matches.range(match.start - 1, match.start - 2,
lambda m: 'source-prefix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
if not seps_after(match) and \
not matches.range(match.end, match.end + 1,
lambda m: 'source-suffix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret
class ValidateWeakSource(Rule):
"""
Validate weak source
"""
dependency = [ValidateSourcePrefixSuffix]
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
# if there are more than 1 source in this filepart, just before the year and with holes for the title
# most likely the source is part of the title
if 'weak.source' in match.tags \
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
and matches.holes(filepart.start, match.start,
predicate=lambda m: m.value.strip(seps), index=-1):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret

View file

@ -25,133 +25,13 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
rebulk.string('AE', 'A&E', value='A&E')
rebulk.string('AMBC', value='ABC')
rebulk.string('AUBC', value='ABC Australia')
rebulk.string('AJAZ', value='Al Jazeera English')
rebulk.string('AMC', value='AMC')
rebulk.string('AMZN', 'Amazon', value='Amazon Prime')
rebulk.regex('Amazon-?Prime', value='Amazon Prime')
rebulk.string('AS', value='Adult Swim')
rebulk.regex('Adult-?Swim', value='Adult Swim')
rebulk.string('ATK', value="America's Test Kitchen")
rebulk.string('ANPL', value='Animal Planet')
rebulk.string('ANLB', value='AnimeLab')
rebulk.string('AOL', value='AOL')
rebulk.string('ARD', value='ARD')
rebulk.string('iP', value='BBC iPlayer')
rebulk.regex('BBC-?iPlayer', value='BBC iPlayer')
rebulk.string('BRAV', value='BravoTV')
rebulk.string('CNLP', value='Canal+')
rebulk.string('CN', value='Cartoon Network')
rebulk.string('CBC', value='CBC')
rebulk.string('CBS', value='CBS')
rebulk.string('CNBC', value='CNBC')
rebulk.string('CC', value='Comedy Central')
rebulk.string('4OD', value='Channel 4')
rebulk.string('CHGD', value='CHRGD')
rebulk.string('CMAX', value='Cinemax')
rebulk.string('CMT', value='Country Music Television')
rebulk.regex('Comedy-?Central', value='Comedy Central')
rebulk.string('CCGC', value='Comedians in Cars Getting Coffee')
rebulk.string('CR', value='Crunchy Roll')
rebulk.string('CRKL', value='Crackle')
rebulk.regex('Crunchy-?Roll', value='Crunchy Roll')
rebulk.string('CSPN', value='CSpan')
rebulk.string('CTV', value='CTV')
rebulk.string('CUR', value='CuriosityStream')
rebulk.string('CWS', value='CWSeed')
rebulk.string('DSKI', value='Daisuki')
rebulk.string('DHF', value='Deadhouse Films')
rebulk.string('DDY', value='Digiturk Diledigin Yerde')
rebulk.string('DISC', 'Discovery', value='Discovery')
rebulk.string('DSNY', 'Disney', value='Disney')
rebulk.string('DIY', value='DIY Network')
rebulk.string('DOCC', value='Doc Club')
rebulk.string('DPLY', value='DPlay')
rebulk.string('ETV', value='E!')
rebulk.string('EPIX', value='ePix')
rebulk.string('ETTV', value='El Trece')
rebulk.string('ESPN', value='ESPN')
rebulk.string('ESQ', value='Esquire')
rebulk.string('FAM', value='Family')
rebulk.string('FJR', value='Family Jr')
rebulk.string('FOOD', value='Food Network')
rebulk.string('FOX', value='Fox')
rebulk.string('FREE', value='Freeform')
rebulk.string('FYI', value='FYI Network')
rebulk.string('GLBL', value='Global')
rebulk.string('GLOB', value='GloboSat Play')
rebulk.string('HLMK', value='Hallmark')
rebulk.string('HBO', value='HBO Go')
rebulk.regex('HBO-?Go', value='HBO Go')
rebulk.string('HGTV', value='HGTV')
rebulk.string('HIST', 'History', value='History')
rebulk.string('HULU', value='Hulu')
rebulk.string('ID', value='Investigation Discovery')
rebulk.string('IFC', value='IFC')
rebulk.string('iTunes', 'iT', value='iTunes')
rebulk.string('ITV', value='ITV')
rebulk.string('KNOW', value='Knowledge Network')
rebulk.string('LIFE', value='Lifetime')
rebulk.string('MTOD', value='Motor Trend OnDemand')
rebulk.string('MNBC', value='MSNBC')
rebulk.string('MTV', value='MTV')
rebulk.string('NATG', value='National Geographic')
rebulk.regex('National-?Geographic', value='National Geographic')
rebulk.string('NBA', value='NBA TV')
rebulk.regex('NBA-?TV', value='NBA TV')
rebulk.string('NBC', value='NBC')
rebulk.string('NF', 'Netflix', value='Netflix')
rebulk.string('NFL', value='NFL')
rebulk.string('NFLN', value='NFL Now')
rebulk.string('GC', value='NHL GameCenter')
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
rebulk.string('NRK', value='Norsk Rikskringkasting')
rebulk.string('PBS', value='PBS')
rebulk.string('PBSK', value='PBS Kids')
rebulk.string('PSN', value='Playstation Network')
rebulk.string('PLUZ', value='Pluzz')
rebulk.string('RTE', value='RTE One')
rebulk.string('SBS', value='SBS (AU)')
rebulk.string('SESO', 'SeeSo', value='SeeSo')
rebulk.string('SHMI', value='Shomi')
rebulk.string('SPIK', value='Spike')
rebulk.string('SPKE', value='Spike TV')
rebulk.regex('Spike-?TV', value='Spike TV')
rebulk.string('SNET', value='Sportsnet')
rebulk.string('SPRT', value='Sprout')
rebulk.string('STAN', value='Stan')
rebulk.string('STZ', value='Starz')
rebulk.string('SVT', value='Sveriges Television')
rebulk.string('SWER', value='SwearNet')
rebulk.string('SYFY', value='Syfy')
rebulk.string('TBS', value='TBS')
rebulk.string('TFOU', value='TFou')
rebulk.string('CW', value='The CW')
rebulk.regex('The-?CW', value='The CW')
rebulk.string('TLC', value='TLC')
rebulk.string('TUBI', value='TubiTV')
rebulk.string('TV3', value='TV3 Ireland')
rebulk.string('TV4', value='TV4 Sweeden')
rebulk.string('TVL', value='TV Land')
rebulk.regex('TV-?Land', value='TV Land')
rebulk.string('UFC', value='UFC')
rebulk.string('UKTV', value='UKTV')
rebulk.string('UNIV', value='Univision')
rebulk.string('USAN', value='USA Network')
rebulk.string('VLCT', value='Velocity')
rebulk.string('VH1', value='VH1')
rebulk.string('VICE', value='Viceland')
rebulk.string('VMEO', value='Vimeo')
rebulk.string('VRV', value='VRV')
rebulk.string('WNET', value='W Network')
rebulk.string('WME', value='WatchMe')
rebulk.string('WWEN', value='WWE Network')
rebulk.string('XBOX', value='Xbox Video')
rebulk.string('YHOO', value='Yahoo')
rebulk.string('RED', value='YouTube Red')
rebulk.string('ZDF', value='ZDF')
for value, items in config.items():
patterns = items if isinstance(items, list) else [items]
for pattern in patterns:
if pattern.startswith('re:'):
rebulk.regex(pattern, value=value)
else:
rebulk.string(pattern, value=value)
rebulk.rules(ValidateStreamingService)
@ -161,7 +41,7 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
class ValidateStreamingService(Rule):
"""Validate streaming service matches."""
priority = 32
priority = 128
consequence = RemoveMatch
def when(self, matches, context):

View file

@ -8,7 +8,12 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
from rebulk.formatters import formatters
from .film import FilmTitleRule
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
from .language import (
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
SubtitleExtensionRule,
NON_SPECIFIC_LANGUAGES
)
from ..common import seps, title_seps
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
@ -88,12 +93,19 @@ class TitleBaseRule(Rule):
:rtype:
"""
cropped_holes = []
group_markers = matches.markers.named('group')
for group_marker in group_markers:
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
if path_marker and path_marker.span == group_marker.span:
group_markers.remove(group_marker)
for hole in holes:
group_markers = matches.markers.named('group')
cropped_holes.extend(hole.crop(group_markers))
return cropped_holes
def is_ignored(self, match):
@staticmethod
def is_ignored(match):
"""
Ignore matches when scanning for title (hole).
@ -130,7 +142,8 @@ class TitleBaseRule(Rule):
for outside in outside_matches:
other_languages.extend(matches.range(outside.start, outside.end,
lambda c_match: c_match.name == match.name and
c_match not in to_keep))
c_match not in to_keep and
c_match.value not in NON_SPECIFIC_LANGUAGES))
if not other_languages and (not starting or len(match.raw) <= 3):
return True
@ -239,7 +252,7 @@ class TitleBaseRule(Rule):
to_remove = []
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
return ret, to_remove
return False
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
@ -272,7 +285,9 @@ class TitleBaseRule(Rule):
ret.extend(titles)
to_remove.extend(to_remove_c)
return ret, to_remove
if ret or to_remove:
return ret, to_remove
return False
class TitleFromPosition(TitleBaseRule):
@ -329,4 +344,6 @@ class PreferTitleWithYear(Rule):
for title_match in titles:
if title_match.value not in title_values:
to_remove.append(title_match)
return to_remove, to_tag
if to_remove or to_tag:
return to_remove, to_tag
return False

View file

@ -3,9 +3,8 @@
"""
video_codec and video_profile property
"""
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from ..common import dash
from ..common.pattern import is_disabled
@ -43,7 +42,8 @@ def video_codec(config): # pylint:disable=unused-argument
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
rebulk.defaults(name="video_profile",
rebulk.defaults(clear=True,
name="video_profile",
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'video_profile'))
@ -66,7 +66,8 @@ def video_codec(config): # pylint:disable=unused-argument
rebulk.string('DXVA', value='DXVA', name='video_api',
disabled=lambda context: is_disabled(context, 'video_api'))
rebulk.defaults(name='color_depth',
rebulk.defaults(clear=True,
name='color_depth',
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'color_depth'))
rebulk.regex('12.?bits?', value='12-bit')

View file

@ -67,7 +67,7 @@ def website(config):
"""
Validator for next website matches
"""
return any(name in ['season', 'episode', 'year'] for name in match.names)
return match.named('season', 'episode', 'year')
def when(self, matches, context):
to_remove = []
@ -80,7 +80,9 @@ def website(config):
if not safe:
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
if suffix:
to_remove.append(website_match)
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
if not group:
to_remove.append(website_match)
return to_remove
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)

View file

@ -35,9 +35,9 @@
-cd: 1
-cd_count: 3
? This.Is.Us
? This.is.Us
: options: --exclude country
title: This Is Us
title: This is Us
-country: US
? 2015.01.31
@ -286,9 +286,9 @@
: options: --exclude website
-website: wawa.co.uk
? movie.mkv
? movie.mp4
: options: --exclude mimetype
-mimetype: video/x-matroska
-mimetype: video/mp4
? another movie.mkv
: options: --exclude container

View file

@ -201,9 +201,9 @@
? Series/My Name Is Earl/My.Name.Is.Earl.S01Extras.-.Bad.Karma.DVDRip.XviD.avi
: title: My Name Is Earl
season: 1
episode_title: Extras - Bad Karma
episode_title: Bad Karma
source: DVD
other: Rip
other: [Extras, Rip]
video_codec: Xvid
? series/Freaks And Geeks/Season 1/Episode 4 - Kim Kelly Is My Friend-eng(1).srt
@ -1917,9 +1917,11 @@
? Duck.Dynasty.S02E07.Streik.German.DOKU.DL.WS.DVDRiP.x264-CDP
: episode: 7
episode_title: Streik German
episode_title: Streik
source: DVD
language: mul
language:
- German
- Multi
other: [Documentary, Widescreen, Rip]
release_group: CDP
season: 2
@ -1930,9 +1932,11 @@
? Family.Guy.S13E14.JOLO.German.AC3D.DL.720p.WebHD.x264-CDD
: audio_codec: Dolby Digital
episode: 14
episode_title: JOLO German
episode_title: JOLO
source: Web
language: mul
language:
- German
- Multi
release_group: CDD
screen_size: 720p
season: 13
@ -3025,7 +3029,7 @@
title: Show Name
episode: [493, 494, 495, 496, 497, 498, 500, 501, 502, 503, 504, 505, 506, 507]
screen_size: 720p
subtitle_language: fr
other: Variable Frame Rate
video_codec: H.264
audio_codec: AAC
type: episode
@ -4524,4 +4528,166 @@
video_codec: H.264
audio_codec: MP2
release_group: KIDKAT
type: episode
? Por Trece Razones - Temporada 2 [HDTV 720p][Cap.201][AC3 5.1 Castellano]/Por Trece Razones 2x01 [des202].mkv
: title: Por Trece Razones
season: 2
source: HDTV
screen_size: 720p
episode: 1
audio_codec: Dolby Digital
audio_channels: '5.1'
language: Catalan
release_group: des202
container: mkv
type: episode
? Cuerpo de Elite - Temporada 1 [HDTV 720p][Cap.113][AC3 5.1 Esp Castellano]\CuerpoDeElite720p_113_desca202.mkv
: title: Cuerpo de Elite
season: 1
source: HDTV
screen_size: 720p
episode: 13
audio_codec: Dolby Digital
audio_channels: '5.1'
language:
- Spanish
- Catalan
container: mkv
type: episode
? Show.Name.S01E01.St.Patricks.Day.1080p.mkv
: title: Show Name
season: 1
episode: 1
episode_title: St Patricks Day
screen_size: 1080p
container: mkv
type: episode
? Show.Name.S01E01.St.Patricks.Day.1080p-grp.mkv
: title: Show Name
season: 1
episode: 1
episode_title: St Patricks Day
screen_size: 1080p
release_group: grp
container: mkv
type: episode
? Titans.2018.S01E09.Hank.And.Dawn.720p.DCU.WEB-DL.AAC2.0.H264-NTb
: title: Titans
year: 2018
season: 1
episode: 9
episode_title: Hank And Dawn
screen_size: 720p
streaming_service: DC Universe
source: Web
audio_codec: AAC
audio_channels: '2.0'
video_codec: H.264
release_group: NTb
type: episode
? S.W.A.T.2017.S01E21.Treibjagd.German.Dubbed.DL.AmazonHD.x264-TVS
: title: S.W.A.T.
year: 2017
season: 1
episode: 21
episode_title: Treibjagd
language:
- German
- Multi
streaming_service: Amazon Prime
other: HD
video_codec: H.264
release_group: TVS
type: episode
? S.W.A.T.2017.S01E16.READNFO.720p.HDTV.x264-KILLERS
: title: S.W.A.T.
year: 2017
season: 1
episode: 16
other: Read NFO
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: KILLERS
type: episode
? /mnt/NAS/NoSubsTVShows/Babylon 5/Season 01/Ep. 02 - Soul Hunter
: title: Babylon 5
season: 1
episode: 2
episode_title: Soul Hunter
type: episode
? This.is.Us.S01E01.HDTV.x264-KILLERS.mkv
: title: This is Us
season: 1
episode: 1
source: HDTV
video_codec: H.264
release_group: KILLERS
container: mkv
type: episode
? Videos/Office1080/The Office (US) (2005) Season 2 S02 + Extras (1080p AMZN WEB-DL x265 HEVC 10bit AAC 2.0 LION)/The Office (US) (2005) - S02E12 - The Injury (1080p AMZN WEB-DL x265 LION).mkv
: title: The Office
country: US
year: 2005
season: 2
other: Extras
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
video_codec: H.265
video_profile: High Efficiency Video Coding
color_depth: 10-bit
audio_codec: AAC
audio_channels: '2.0'
release_group: LION
episode: 12
episode_title: The Injury
container: mkv
type: episode
? Thumping.Spike.2.E01.DF.WEBRip.720p-DRAMATV.mp4
: title: Thumping Spike 2
episode: 1
source: Web
other: Rip
screen_size: 720p
streaming_service: DramaFever
release_group: DRAMATV
container: mp4
mimetype: video/mp4
type: episode
? About.Time.E01.1080p.VIKI.WEB-DL-BLUEBERRY.mp4
: title: About Time
episode: 1
screen_size: 1080p
streaming_service: Viki
source: Web
release_group: BLUEBERRY
container: mp4
mimetype: video/mp4
type: episode
? Eyes.Of.Dawn.1991.E01.480p.MBCVOD.AAC.x264-NOGPR.mp4
: title: Eyes Of Dawn
year: 1991
season: 1991
episode: 1
screen_size: 480p
streaming_service: MBC
audio_codec: AAC
video_codec: H.264
release_group: NOGPR
container: mp4
mimetype: video/mp4
type: episode

View file

@ -815,10 +815,12 @@
? Das.Appartement.German.AC3D.DL.720p.BluRay.x264-TVP
: audio_codec: Dolby Digital
source: Blu-ray
language: mul
language:
- German
- Multi
release_group: TVP
screen_size: 720p
title: Das Appartement German
title: Das Appartement
type: movie
video_codec: H.264
@ -1723,7 +1725,7 @@
? Ant-Man.and.the.Wasp.2018.Digital.Extras.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv
: title: Ant-Man and the Wasp
year: 2018
alternative_title: Digital Extras
other: Extras
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
@ -1770,4 +1772,15 @@
audio_channels: '5.1'
video_codec: H.264
release_group: CMRG
type: movie
type: movie
? The.Girl.in.the.Spiders.Web.2019.1080p.WEB-DL.x264.AC3-EVO.mkv
: title: The Girl in the Spiders Web
year: 2019
screen_size: 1080p
source: Web
video_codec: H.264
audio_codec: Dolby Digital
release_group: EVO
container: mkv
type: movie

View file

@ -0,0 +1,467 @@
? is
: title: is
? it
: title: it
? am
: title: am
? mad
: title: mad
? men
: title: men
? man
: title: man
? run
: title: run
? sin
: title: sin
? st
: title: st
? to
: title: to
? 'no'
: title: 'no'
? non
: title: non
? war
: title: war
? min
: title: min
? new
: title: new
? car
: title: car
? day
: title: day
? bad
: title: bad
? bat
: title: bat
? fan
: title: fan
? fry
: title: fry
? cop
: title: cop
? zen
: title: zen
? gay
: title: gay
? fat
: title: fat
? one
: title: one
? cherokee
: title: cherokee
? got
: title: got
? an
: title: an
? as
: title: as
? cat
: title: cat
? her
: title: her
? be
: title: be
? hat
: title: hat
? sun
: title: sun
? may
: title: may
? my
: title: my
? mr
: title: mr
? rum
: title: rum
? pi
: title: pi
? bb
: title: bb
? bt
: title: bt
? tv
: title: tv
? aw
: title: aw
? by
: title: by
? md
: other: Mic Dubbed
? mp
: title: mp
? cd
: title: cd
? in
: title: in
? ad
: title: ad
? ice
: title: ice
? ay
: title: ay
? at
: title: at
? star
: title: star
? so
: title: so
? he
: title: he
? do
: title: do
? ax
: title: ax
? mx
: title: mx
? bas
: title: bas
? de
: title: de
? le
: title: le
? son
: title: son
? ne
: title: ne
? ca
: title: ca
? ce
: title: ce
? et
: title: et
? que
: title: que
? mal
: title: mal
? est
: title: est
? vol
: title: vol
? or
: title: or
? mon
: title: mon
? se
: title: se
? je
: title: je
? tu
: title: tu
? me
: title: me
? ma
: title: ma
? va
: title: va
? au
: country: AU
? lu
: title: lu
? wa
: title: wa
? ga
: title: ga
? ao
: title: ao
? la
: title: la
? el
: title: el
? del
: title: del
? por
: title: por
? mar
: title: mar
? al
: title: al
? un
: title: un
? ind
: title: ind
? arw
: title: arw
? ts
: source: Telesync
? ii
: title: ii
? bin
: title: bin
? chan
: title: chan
? ss
: title: ss
? san
: title: san
? oss
: title: oss
? iii
: title: iii
? vi
: title: vi
? ben
: title: ben
? da
: title: da
? lt
: title: lt
? ch
: title: ch
? sr
: title: sr
? ps
: title: ps
? cx
: title: cx
? vo
: title: vo
? mkv
: container: mkv
? avi
: container: avi
? dmd
: title: dmd
? the
: title: the
? dis
: title: dis
? cut
: title: cut
? stv
: title: stv
? des
: title: des
? dia
: title: dia
? and
: title: and
? cab
: title: cab
? sub
: title: sub
? mia
: title: mia
? rim
: title: rim
? las
: title: las
? une
: title: une
? par
: title: par
? srt
: container: srt
? ano
: title: ano
? toy
: title: toy
? job
: title: job
? gag
: title: gag
? reel
: title: reel
? www
: title: www
? for
: title: for
? ayu
: title: ayu
? csi
: title: csi
? ren
: title: ren
? moi
: title: moi
? sur
: title: sur
? fer
: title: fer
? fun
: title: fun
? two
: title: two
? big
: title: big
? psy
: title: psy
? air
: title: air
? brazil
: title: brazil
? jordan
: title: jordan
? bs
: title: bs
? kz
: title: kz
? gt
: title: gt
? im
: title: im
? pt
: language: pt
? scr
: title: scr
? sd
: title: sd
? hr
: other: High Resolution

View file

@ -5,8 +5,8 @@
: country: US
title: this is title
? This.is.us.title
: title: This is us title
? This.is.Us
: title: This is Us
? This.Is.Us
: options: --no-default-config

View file

@ -48,7 +48,7 @@
proper_count: 3
? Proper
? Proper.720p
? +Repack
? +Rerip
: other: Proper
@ -80,7 +80,7 @@
? Remux
: other: Remux
? 3D
? 3D.2019
: other: 3D
? HD

View file

@ -0,0 +1,21 @@
{
"titles": [
"13 Reasons Why",
"Star Wars: Episode VII - The Force Awakens",
"3%",
"The 100",
"3 Percent",
"This is Us",
"Open Season 2",
"Game of Thrones",
"The X-Files",
"11.22.63"
],
"suggested": [
"13 Reasons Why",
"Star Wars: Episode VII - The Force Awakens",
"The 100",
"Open Season 2",
"11.22.63"
]
}

View file

@ -1,13 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement
import json
import os
import sys
import pytest
import six
from ..api import guessit, properties, GuessitException
from ..api import guessit, properties, suggested_expected, GuessitException
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
@ -27,12 +28,16 @@ def test_forced_binary():
assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type)
@pytest.mark.skipif('sys.version_info < (3, 4)', reason="Path is not available")
@pytest.mark.skipif(sys.version_info < (3, 4), reason="Path is not available")
def test_pathlike_object():
from pathlib import Path
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
ret = guessit(path)
assert ret and 'title' in ret
try:
from pathlib import Path
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
ret = guessit(path)
assert ret and 'title' in ret
except ImportError: # pragma: no-cover
pass
def test_unicode_japanese():
@ -69,3 +74,10 @@ def test_exception():
assert "An internal error has occured in guessit" in str(excinfo.value)
assert "Guessit Exception Report" in str(excinfo.value)
assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value)
def test_suggested_expected():
with open(os.path.join(__location__, 'suggested.json'), 'r') as f:
content = json.load(f)
actual = suggested_expected(content['titles'])
assert actual == content['suggested']

View file

@ -7,9 +7,8 @@ import os
from io import open # pylint: disable=redefined-builtin
import babelfish
import pytest
import six
import yaml
import six # pylint:disable=wrong-import-order
import yaml # pylint:disable=wrong-import-order
from rebulk.remodule import re
from rebulk.utils import is_iterable
@ -21,13 +20,6 @@ logger = logging.getLogger(__name__)
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
filename_predicate = None
string_predicate = None
# filename_predicate = lambda filename: 'episode_title' in filename
# string_predicate = lambda string: '-DVD.BlablaBla.Fix.Blablabla.XVID' in string
class EntryResult(object):
def __init__(self, string, negates=False):
@ -134,7 +126,49 @@ class TestYml(object):
options_re = re.compile(r'^([ +-]+)(.*)')
files, ids = files_and_ids(filename_predicate)
def _get_unique_id(self, collection, base_id):
ret = base_id
i = 2
while ret in collection:
suffix = "-" + str(i)
ret = base_id + suffix
i += 1
return ret
def pytest_generate_tests(self, metafunc):
if 'yml_test_case' in metafunc.fixturenames:
entries = []
entry_ids = []
entry_set = set()
for filename, _ in zip(*files_and_ids()):
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
data = yaml.load(infile, OrderedDictYAMLLoader)
last_expected = None
for string, expected in reversed(list(data.items())):
if expected is None:
data[string] = last_expected
else:
last_expected = expected
default = None
try:
default = data['__default__']
del data['__default__']
except KeyError:
pass
for string, expected in data.items():
TestYml.set_default(expected, default)
string = TestYml.fix_encoding(string, expected)
entries.append((filename, string, expected))
unique_id = self._get_unique_id(entry_set, '[' + filename + '] ' + str(string))
entry_set.add(unique_id)
entry_ids.append(unique_id)
metafunc.parametrize('yml_test_case', entries, ids=entry_ids)
@staticmethod
def set_default(expected, default):
@ -143,34 +177,8 @@ class TestYml(object):
if k not in expected:
expected[k] = v
@pytest.mark.parametrize('filename', files, ids=ids)
def test(self, filename, caplog):
caplog.set_level(logging.INFO)
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
data = yaml.load(infile, OrderedDictYAMLLoader)
entries = Results()
last_expected = None
for string, expected in reversed(list(data.items())):
if expected is None:
data[string] = last_expected
else:
last_expected = expected
default = None
try:
default = data['__default__']
del data['__default__']
except KeyError:
pass
for string, expected in data.items():
TestYml.set_default(expected, default)
entry = self.check_data(filename, string, expected)
entries.append(entry)
entries.assert_ok()
def check_data(self, filename, string, expected):
@classmethod
def fix_encoding(cls, string, expected):
if six.PY2:
if isinstance(string, six.text_type):
string = string.encode('utf-8')
@ -183,16 +191,23 @@ class TestYml(object):
expected[k] = v
if not isinstance(string, str):
string = str(string)
if not string_predicate or string_predicate(string): # pylint: disable=not-callable
entry = self.check(string, expected)
if entry.ok:
logger.debug('[%s] %s', filename, entry)
elif entry.warning:
logger.warning('[%s] %s', filename, entry)
elif entry.error:
logger.error('[%s] %s', filename, entry)
for line in entry.details:
logger.error('[%s] %s', filename, ' ' * 4 + line)
return string
def test_entry(self, yml_test_case):
filename, string, expected = yml_test_case
result = self.check_data(filename, string, expected)
assert not result.error
def check_data(self, filename, string, expected):
entry = self.check(string, expected)
if entry.ok:
logger.debug('[%s] %s', filename, entry)
elif entry.warning:
logger.warning('[%s] %s', filename, entry)
elif entry.error:
logger.error('[%s] %s', filename, entry)
for line in entry.details:
logger.error('[%s] %s', filename, ' ' * 4 + line)
return entry
def check(self, string, expected):

View file

@ -946,3 +946,254 @@
source: Blu-ray
audio_codec: DTS-HD
type: movie
? Mr Robot - S03E01 - eps3 0 power-saver-mode h (1080p AMZN WEB-DL x265 HEVC 10bit EAC3 6.0 RCVR).mkv
: title: Mr Robot
season: 3
episode: 1
episode_title: eps3 0 power-saver-mode h
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
video_codec: H.265
video_profile: High Efficiency Video Coding
color_depth: 10-bit
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
release_group: RCVR
container: mkv
type: episode
? Panorama.15-05-2018.Web-DL.540p.H264.AAC.Subs.mp4
: title: Panorama
date: 2018-05-15
source: Web
screen_size: 540p
video_codec: H.264
audio_codec: AAC
subtitle_language: und
container: mp4
type: episode
? Shaolin 2011.720p.BluRay.x264-x0r.mkv
: title: Shaolin
year: 2011
screen_size: 720p
source: Blu-ray
video_codec: H.264
release_group: x0r
container: mkv
type: movie
? '[ Engineering Catastrophes S02E10 1080p AMZN WEB-DL DD+ 2.0 x264-TrollHD ]'
: title: Engineering Catastrophes
season: 2
episode: 10
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '2.0'
video_codec: H.264
release_group: TrollHD
type: episode
? A Very Harold & Kumar 3D Christmas (2011).mkv
: title: A Very Harold & Kumar 3D Christmas
year: 2011
container: mkv
type: movie
? Cleveland.Hustles.S01E03.Downward.Dogs.and.Proper.Pigs.720p.HDTV.x264-W4F
: title: Cleveland Hustles
season: 1
episode: 3
episode_title: Downward Dogs and Proper Pigs
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: W4F
type: episode
? Pawn.Stars.S12E20.The.Pawn.Awakens.REAL.READ.NFO.720p.HDTV.x264-DHD
: title: Pawn Stars
season: 12
episode: 20
episode_title: The Pawn Awakens
other:
- Proper
- Read NFO
proper_count: 2
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: DHD
type: episode
? Pawn.Stars.S12E22.Racing.Revolution.REAL.720p.HDTV.x264-DHD
: title: Pawn Stars
season: 12
episode: 22
episode_title: Racing Revolution
other: Proper
proper_count: 2
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: DHD
type: episode
? Luksusfellen.S18E02.REAL.NORWEGiAN.720p.WEB.h264-NORPiLT
: title: Luksusfellen
season: 18
episode: 2
other: Proper
proper_count: 2
language: Norwegian
screen_size: 720p
source: Web
video_codec: H.264
release_group: NORPiLT
type: episode
? The.Exorcist.S02E07.REAL.FRENCH.720p.HDTV.x264-SH0W
: title: The Exorcist
season: 2
episode: 7
other: Proper
proper_count: 2
language: fr
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: SH0W
type: episode
? Outrageous.Acts.of.Science.S05E02.Is.This.for.Real.720p.HDTV.x264-DHD
: title: Outrageous Acts of Science
season: 5
episode: 2
# corner case
# episode_title: Is This for Real
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: DHD
type: episode
? How.the.Universe.Works.S06E08.Strange.Lives.of.Dwarf.Planets.REAL.720p.WEB.x264-DHD
: title: How the Universe Works
season: 6
episode: 8
episode_title: Strange Lives of Dwarf Planets
other: Proper
proper_count: 2
screen_size: 720p
source: Web
video_codec: H.264
release_group: DHD
type: episode
? Vampirina.S01E16.REAL.HDTV.x264-W4F
: title: Vampirina
season: 1
episode: 16
other: Proper
proper_count: 2
source: HDTV
video_codec: H.264
release_group: W4F
type: episode
? Test.S01E16.Some Real Episode Title.HDTV.x264-W4F
: title: Test
season: 1
episode: 16
episode_title: Some Real Episode Title
source: HDTV
video_codec: H.264
release_group: W4F
type: episode
? NOS4A2.S01E01.The.Shorter.Way.REPACK.720p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv
: title: NOS4A2
season: 1
episode: 1
episode_title: The Shorter Way
other: Proper
proper_count: 1
screen_size: 720p
streaming_service: Amazon Prime
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
video_codec: H.264
release_group: NTG
container: mkv
type: episode
? Star Trek DS9 Ep 2x03 The Siege (Part III)
: title: Star Trek DS9
season: 2
episode: 3
episode_title: The Siege
part: 3
type: episode
? The.Red.Line.S01E01
: title: The Red Line
season: 1
episode: 1
type: episode
? Show.S01E01.WEB.x264-METCON.mkv
: title: Show
season: 1
episode: 1
source: Web
video_codec: H.264
release_group: METCON
container: mkv
type: episode
? Show.S01E01.WEB.x264-TCMEON.mkv
: title: Show
season: 1
episode: 1
source: Web
video_codec: H.264
release_group: TCMEON
container: mkv
type: episode
? Show.S01E01.WEB.x264-MEONTC.mkv
: title: Show
season: 1
episode: 1
source: Web
video_codec: H.264
release_group: MEONTC
container: mkv
type: episode
? '[TorrentCouch.com].Westworld.S02.Complete.720p.WEB-DL.x264.[MP4].[5.3GB].[Season.2.Full]/[TorrentCouch.com].Westworld.S02E03.720p.WEB-DL.x264.mp4'
: website: TorrentCouch.com
title: Westworld
season: 2
other: Complete
screen_size: 720p
source: Web
video_codec: H.264
container: mp4
size: 5.3GB
episode: 3
type: episode
? Vita.&.Virginia.2018.720p.H.264.YTS.LT.mp4
: title: Vita & Virginia
year: 2018
screen_size: 720p
video_codec: H.264
release_group: YTS.LT
container: mp4
type: movie

View file

@ -10,19 +10,19 @@ except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
import babelfish
import yaml
import yaml # pylint:disable=wrong-import-order
from .rules.common.quantity import BitRate, FrameRate, Size
class OrderedDictYAMLLoader(yaml.Loader):
class OrderedDictYAMLLoader(yaml.SafeLoader):
"""
A YAML loader that loads mappings into ordered dictionaries.
From https://gist.github.com/enaeseth/844388
"""
def __init__(self, *args, **kwargs):
yaml.Loader.__init__(self, *args, **kwargs)
yaml.SafeLoader.__init__(self, *args, **kwargs)
self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)
self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map)
@ -58,7 +58,7 @@ class CustomDumper(yaml.SafeDumper):
"""
Custom YAML Dumper.
"""
pass
pass # pylint:disable=unnecessary-pass
def default_representer(dumper, data):

View file

@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '1.0.0'
__version__ = '2.0.1'

View file

@ -0,0 +1,217 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Base builder class for Rebulk
"""
from abc import ABCMeta, abstractmethod
from copy import deepcopy
from logging import getLogger
from six import add_metaclass
from .loose import set_defaults
from .pattern import RePattern, StringPattern, FunctionalPattern
log = getLogger(__name__).log
@add_metaclass(ABCMeta)
class Builder(object):
"""
Base builder class for patterns
"""
def __init__(self):
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
self._chain_defaults = {}
def reset(self):
"""
Reset all defaults.
:return:
"""
self.__init__()
def defaults(self, **kwargs):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._defaults, override=True)
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._regex_defaults, override=True)
return self
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._string_defaults, override=True)
return self
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._functional_defaults, override=True)
return self
def chain_defaults(self, **kwargs):
"""
Define default keyword arguments for patterns chain.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(kwargs, self._chain_defaults, override=True)
return self
def build_re(self, *pattern, **kwargs):
"""
Builds a new regular expression pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return RePattern(*pattern, **kwargs)
def build_string(self, *pattern, **kwargs):
"""
Builds a new string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._string_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return StringPattern(*pattern, **kwargs)
def build_functional(self, *pattern, **kwargs):
"""
Builds a new functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return FunctionalPattern(*pattern, **kwargs)
def build_chain(self, **kwargs):
"""
Builds a new patterns chain
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
from .chain import Chain
set_defaults(self._chain_defaults, kwargs)
set_defaults(self._defaults, kwargs)
chain = Chain(self, **kwargs)
chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
return chain
@abstractmethod
def pattern(self, *pattern):
"""
Register a list of Pattern instance
:param pattern:
:return:
"""
pass
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
return self.pattern(self.build_re(*pattern, **kwargs))
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
return self.pattern(self.build_string(*pattern, **kwargs))
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
functional = self.build_functional(*pattern, **kwargs)
return self.pattern(functional)
def chain(self, **kwargs):
"""
Add patterns chain, using configuration of this rebulk
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
chain = self.build_chain(**kwargs)
self.pattern(chain)
return chain

View file

@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group
# pylint: disable=super-init-not-called
import itertools
from .loose import call, set_defaults
from .builder import Builder
from .loose import call
from .match import Match, Matches
from .pattern import Pattern, filter_match_kwargs
from .pattern import Pattern, filter_match_kwargs, BasePattern
from .remodule import re
@ -19,150 +20,46 @@ class _InvalidChainException(Exception):
pass
class Chain(Pattern):
class Chain(Pattern, Builder):
"""
Definition of a pattern chain to search for.
"""
def __init__(self, rebulk, chain_breaker=None, **kwargs):
call(super(Chain, self).__init__, **kwargs)
def __init__(self, parent, chain_breaker=None, **kwargs):
Builder.__init__(self)
call(Pattern.__init__, self, **kwargs)
self._kwargs = kwargs
self._match_kwargs = filter_match_kwargs(kwargs)
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
if callable(chain_breaker):
self.chain_breaker = chain_breaker
else:
self.chain_breaker = None
self.rebulk = rebulk
self.parent = parent
self.parts = []
def defaults(self, **kwargs):
def pattern(self, *pattern):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._defaults = kwargs
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._regex_defaults = kwargs
return self
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._string_defaults = kwargs
return self
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._functional_defaults = kwargs
return self
def chain(self):
"""
Add patterns chain, using configuration from this chain
:return:
:rtype:
"""
# pylint: disable=protected-access
chain = self.rebulk.chain(**self._kwargs)
chain._defaults = dict(self._defaults)
chain._regex_defaults = dict(self._regex_defaults)
chain._functional_defaults = dict(self._functional_defaults)
chain._string_defaults = dict(self._string_defaults)
return chain
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_re(*pattern, **kwargs)
part = ChainPart(self, pattern)
self.parts.append(part)
return part
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_functional(*pattern, **kwargs)
part = ChainPart(self, pattern)
self.parts.append(part)
return part
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._kwargs, kwargs)
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
pattern = self.rebulk.build_string(*pattern, **kwargs)
part = ChainPart(self, pattern)
if not pattern:
raise ValueError("One pattern should be given to the chain")
if len(pattern) > 1:
raise ValueError("Only one pattern can be given to the chain")
part = ChainPart(self, pattern[0])
self.parts.append(part)
return part
def close(self):
"""
Close chain builder to continue registering other pattern
:return:
:rtype:
Deeply close the chain
:return: Rebulk instance
"""
return self.rebulk
parent = self.parent
while isinstance(parent, Chain):
parent = parent.parent
return parent
def _match(self, pattern, input_string, context=None):
# pylint: disable=too-many-locals,too-many-nested-blocks
@ -173,42 +70,20 @@ class Chain(Pattern):
chain_found = False
current_chain_matches = []
valid_chain = True
is_chain_start = True
for chain_part in self.parts:
try:
chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part,
chain_input_string,
context)
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
if raw_chain_part_matches:
grouped_matches_dict = dict()
for match_index, match in itertools.groupby(chain_part_matches,
lambda m: m.match_index):
grouped_matches_dict[match_index] = list(match)
grouped_raw_matches_dict = dict()
for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
lambda m: m.match_index):
grouped_raw_matches_dict[match_index] = list(raw_match)
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
chain_found = True
offset = grouped_raw_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
grouped_matches = grouped_matches_dict.get(match_index, [])
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
current_chain_matches.extend(grouped_matches)
chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
context,
with_raw_matches=True)
chain_found, chain_input_string, offset = \
self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
input_string, chain_input_string, offset, current_chain_matches)
except _InvalidChainException:
valid_chain = False
if current_chain_matches:
offset = current_chain_matches[0].raw_end
break
is_chain_start = False
if not chain_found:
break
if current_chain_matches and valid_chain:
@ -217,38 +92,66 @@ class Chain(Pattern):
return chain_matches
def _match_parent(self, match, yield_parent):
def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
input_string, chain_input_string, offset, current_chain_matches):
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
if raw_chain_part_matches:
grouped_matches_dict = self._group_by_match_index(chain_part_matches)
grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
chain_found = True
offset = grouped_raw_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
grouped_matches = grouped_matches_dict.get(match_index, [])
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
current_chain_matches.extend(grouped_matches)
return chain_found, chain_input_string, offset
def _process_match(self, match, match_index, child=False):
"""
Handle a parent match
Handle a match
:param match:
:type match:
:param yield_parent:
:type yield_parent:
:param match_index:
:type match_index:
:param child:
:type child:
:return:
:rtype:
"""
ret = super(Chain, self)._match_parent(match, yield_parent)
original_children = Matches(match.children)
original_end = match.end
while not ret and match.children:
last_pattern = match.children[-1].pattern
last_pattern_children = [child for child in match.children if child.pattern == last_pattern]
last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index)
last_pattern_groups = {}
for index, matches in last_pattern_groups_iter:
last_pattern_groups[index] = list(matches)
# pylint: disable=too-many-locals
ret = super(Chain, self)._process_match(match, match_index, child=child)
if ret:
return True
for index in reversed(list(last_pattern_groups)):
last_matches = list(last_pattern_groups[index])
for last_match in last_matches:
match.children.remove(last_match)
match.end = match.children[-1].end if match.children else match.start
ret = super(Chain, self)._match_parent(match, yield_parent)
if ret:
return True
match.children = original_children
match.end = original_end
return ret
if match.children:
last_pattern = match.children[-1].pattern
last_pattern_groups = self._group_by_match_index(
[child_ for child_ in match.children if child_.pattern == last_pattern]
)
if last_pattern_groups:
original_children = Matches(match.children)
original_end = match.end
for index in reversed(list(last_pattern_groups)):
last_matches = last_pattern_groups[index]
for last_match in last_matches:
match.children.remove(last_match)
match.end = match.children[-1].end if match.children else match.start
ret = super(Chain, self)._process_match(match, match_index, child=child)
if ret:
return True
match.children = original_children
match.end = original_end
return False
def _build_chain_match(self, current_chain_matches, input_string):
start = None
@ -282,46 +185,11 @@ class Chain(Pattern):
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
@staticmethod
def _match_chain_part(is_chain_start, chain_part, chain_input_string, context):
chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context,
with_raw_matches=True)
chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part,
chain_input_string)
raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part,
chain_input_string)
Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part)
return chain_part_matches, raw_chain_part_matches
@staticmethod
def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string):
if not chain_part_matches:
return chain_part_matches
if not is_chain_start:
separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
if separator:
return []
j = 1
for i in range(0, len(chain_part_matches) - 1):
separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
chain_part_matches[i + 1].initiator.raw_start]
if separator:
break
j += 1
truncated = chain_part_matches[:j]
if chain_part.repeater_end is not None:
truncated = [m for m in truncated if m.match_index < chain_part.repeater_end]
return truncated
@staticmethod
def _validate_chain_part_matches(chain_part_matches, chain_part):
max_match_index = -1
if chain_part_matches:
max_match_index = max([m.match_index for m in chain_part_matches])
if max_match_index + 1 < chain_part.repeater_start:
raise _InvalidChainException
def _group_by_match_index(matches):
grouped_matches_dict = dict()
for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
grouped_matches_dict[match_index] = list(match)
return grouped_matches_dict
@property
def match_options(self):
@ -338,7 +206,7 @@ class Chain(Pattern):
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
class ChainPart(object):
class ChainPart(BasePattern):
"""
Part of a pattern chain.
"""
@ -350,6 +218,51 @@ class ChainPart(object):
self.repeater_end = 1
self._hidden = False
@property
def _is_chain_start(self):
return self._chain.parts[0] == self
def matches(self, input_string, context=None, with_raw_matches=False):
matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
matches = self._truncate_repeater(matches, input_string)
raw_matches = self._truncate_repeater(raw_matches, input_string)
self._validate_repeater(raw_matches)
if with_raw_matches:
return matches, raw_matches
return matches
def _truncate_repeater(self, matches, input_string):
if not matches:
return matches
if not self._is_chain_start:
separator = input_string[0:matches[0].initiator.raw_start]
if separator:
return []
j = 1
for i in range(0, len(matches) - 1):
separator = input_string[matches[i].initiator.raw_end:
matches[i + 1].initiator.raw_start]
if separator:
break
j += 1
truncated = matches[:j]
if self.repeater_end is not None:
truncated = [m for m in truncated if m.match_index < self.repeater_end]
return truncated
def _validate_repeater(self, matches):
max_match_index = -1
if matches:
max_match_index = max([m.match_index for m in matches])
if max_match_index + 1 < self.repeater_start:
raise _InvalidChainException
def chain(self):
"""
Add patterns chain, using configuration from this chain

View file

@ -15,9 +15,19 @@ def formatters(*chained_formatters):
:return:
:rtype:
"""
def formatters_chain(input_string): # pylint:disable=missing-docstring
for chained_formatter in chained_formatters:
input_string = chained_formatter(input_string)
return input_string
return formatters_chain
def default_formatter(input_string):
"""
Default formatter
:param input_string:
:return:
"""
return input_string

View file

@ -3,7 +3,7 @@
"""
Introspect rebulk object to retrieve capabilities.
"""
from abc import ABCMeta, abstractproperty
from abc import ABCMeta, abstractmethod
from collections import defaultdict
import six
@ -16,7 +16,8 @@ class Description(object):
"""
Abstract class for a description.
"""
@abstractproperty
@property
@abstractmethod
def properties(self): # pragma: no cover
"""
Properties of described object.

View file

@ -4,12 +4,12 @@
Various utilities functions
"""
import sys
import inspect
from inspect import isclass
try:
from inspect import getfullargspec as getargspec
_fullargspec_supported = True
except ImportError:
_fullargspec_supported = False
@ -55,8 +55,8 @@ def call(function, *args, **kwargs):
:return: sale vakye as default function call
:rtype: object
"""
func = constructor_args if inspect.isclass(function) else function_args
call_args, call_kwargs = func(function, *args, **kwargs)
func = constructor_args if isclass(function) else function_args
call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20
return function(*call_args, **call_kwargs)
@ -145,6 +145,8 @@ if not _fullargspec_supported:
else:
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
return call_args, call_kwarg
argspec_args = argspec_args_legacy
@ -215,9 +217,12 @@ def filter_index(collection, predicate=None, index=None):
return collection
def set_defaults(defaults, kwargs):
def set_defaults(defaults, kwargs, override=False):
"""
Set defaults from defaults dict to kwargs dict
:param override:
:type override:
:param defaults:
:type defaults:
:param kwargs:
@ -225,12 +230,13 @@ def set_defaults(defaults, kwargs):
:return:
:rtype:
"""
if 'clear' in defaults.keys() and defaults.pop('clear'):
kwargs.clear()
for key, value in defaults.items():
if key not in kwargs and value is not None:
if key in kwargs:
if isinstance(value, list) and isinstance(kwargs[key], list):
kwargs[key] = list(value) + kwargs[key]
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
set_defaults(value, kwargs[key])
if key not in kwargs or override:
kwargs[key] = value
elif isinstance(value, list) and isinstance(kwargs[key], list):
kwargs[key] = list(value) + kwargs[key]
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
set_defaults(value, kwargs[key])
elif key in kwargs and value is None:
kwargs[key] = None

View file

@ -815,6 +815,24 @@ class Match(object):
return filter_index(ret, predicate, index)
def tagged(self, *tags):
"""
Check if this match has at least one of the provided tags
:param tags:
:return: True if at least one tag is defined, False otherwise.
"""
return any(tag in self.tags for tag in tags)
def named(self, *names):
"""
Check if one of the children match has one of the provided name
:param names:
:return: True if at least one child is named with a given name is defined, False otherwise.
"""
return any(name in self.names for name in names)
def __len__(self):
return self.end - self.start

View file

@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty
import six
from . import debug
from .formatters import default_formatter
from .loose import call, ensure_list, ensure_dict
from .match import Match
from .remodule import re, REGEX_AVAILABLE
from .utils import find_all, is_iterable, get_first_defined
from .validators import allways_true
@six.add_metaclass(ABCMeta)
class Pattern(object):
class BasePattern(object):
"""
Base class for Pattern like objects
"""
@abstractmethod
def matches(self, input_string, context=None, with_raw_matches=False):
"""
Computes all matches for a given input
:param input_string: the string to parse
:type input_string: str
:param context: the context
:type context: dict
:param with_raw_matches: should return details
:type with_raw_matches: dict
:return: matches based on input_string for this pattern
:rtype: iterator[Match]
"""
pass
@six.add_metaclass(ABCMeta)
class Pattern(BasePattern):
"""
Definition of a particular pattern to search for.
"""
@ -25,7 +50,7 @@ class Pattern(object):
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
properties=None, post_processor=None, **kwargs):
properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
"""
:param name: Name of this pattern
:type name: str
@ -66,15 +91,19 @@ class Pattern(object):
:type disabled: bool|function
:param log_lvl: Log level associated to this pattern
:type log_lvl: int
:param post_process: Post processing function
:param post_processor: Post processing function
:type post_processor: func
:param pre_match_processor: Pre match processing function
:type pre_match_processor: func
:param post_match_processor: Post match processing function
:type post_match_processor: func
"""
# pylint:disable=too-many-locals,unused-argument
self.name = name
self.tags = ensure_list(tags)
self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x)
self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
self.values, self._default_value = ensure_dict(value, None)
self.validators, self._default_validator = ensure_dict(validator, lambda match: True)
self.validators, self._default_validator = ensure_dict(validator, allways_true)
self.every = every
self.children = children
self.private = private
@ -96,6 +125,14 @@ class Pattern(object):
self.post_processor = None
else:
self.post_processor = post_processor
if not callable(pre_match_processor):
self.pre_match_processor = None
else:
self.pre_match_processor = pre_match_processor
if not callable(post_match_processor):
self.post_match_processor = None
else:
self.post_match_processor = post_match_processor
@property
def log_level(self):
@ -106,83 +143,6 @@ class Pattern(object):
"""
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
def _yield_children(self, match):
"""
Does this match has children
:param match:
:type match:
:return:
:rtype:
"""
return match.children and (self.children or self.every)
def _yield_parent(self):
"""
Does this mat
:param match:
:type match:
:return:
:rtype:
"""
return not self.children or self.every
def _match_parent(self, match, yield_parent):
"""
Handle a parent match
:param match:
:type match:
:param yield_parent:
:type yield_parent:
:return:
:rtype:
"""
if not match or match.value == "":
return False
pattern_value = get_first_defined(self.values, [match.name, '__parent__', None],
self._default_value)
if pattern_value:
match.value = pattern_value
if yield_parent or self.format_all:
match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None],
self._default_formatter)
if yield_parent or self.validate_all:
validator = get_first_defined(self.validators, [match.name, '__parent__', None],
self._default_validator)
if validator and not validator(match):
return False
return True
def _match_child(self, child, yield_children):
"""
Handle a children match
:param child:
:type child:
:param yield_children:
:type yield_children:
:return:
:rtype:
"""
if not child or child.value == "":
return False
pattern_value = get_first_defined(self.values, [child.name, '__children__', None],
self._default_value)
if pattern_value:
child.value = pattern_value
if yield_children or self.format_all:
child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None],
self._default_formatter)
if yield_children or self.validate_all:
validator = get_first_defined(self.validators, [child.name, '__children__', None],
self._default_validator)
if validator and not validator(child):
return False
return True
def matches(self, input_string, context=None, with_raw_matches=False):
"""
Computes all matches for a given input
@ -200,41 +160,168 @@ class Pattern(object):
matches = []
raw_matches = []
for pattern in self.patterns:
yield_parent = self._yield_parent()
match_index = -1
match_index = 0
for match in self._match(pattern, input_string, context):
match_index += 1
match.match_index = match_index
raw_matches.append(match)
yield_children = self._yield_children(match)
if not self._match_parent(match, yield_parent):
continue
validated = True
for child in match.children:
if not self._match_child(child, yield_children):
validated = False
break
if validated:
if self.private_parent:
match.private = True
if self.private_children:
for child in match.children:
child.private = True
if yield_parent or self.private_parent:
matches.append(match)
if yield_children or self.private_children:
for child in match.children:
child.match_index = match_index
matches.append(child)
matches = self._matches_post_process(matches)
self._matches_privatize(matches)
self._matches_ignore(matches)
matches.extend(self._process_matches(match, match_index))
match_index += 1
matches = self._post_process_matches(matches)
if with_raw_matches:
return matches, raw_matches
return matches
def _matches_post_process(self, matches):
@property
def _should_include_children(self):
"""
Check if children matches from this pattern should be included in matches results.
:param match:
:type match:
:return:
:rtype:
"""
return self.children or self.every
@property
def _should_include_parent(self):
"""
Check is a match from this pattern should be included in matches results.
:param match:
:type match:
:return:
:rtype:
"""
return not self.children or self.every
@staticmethod
def _match_config_property_keys(match, child=False):
if match.name:
yield match.name
if child:
yield '__children__'
else:
yield '__parent__'
yield None
@staticmethod
def _process_match_index(match, match_index):
"""
Process match index from this pattern process state.
:param match:
:return:
"""
match.match_index = match_index
def _process_match_private(self, match, child=False):
"""
Process match privacy from this pattern configuration.
:param match:
:param child:
:return:
"""
if match.name and match.name in self.private_names or \
not child and self.private_parent or \
child and self.private_children:
match.private = True
def _process_match_value(self, match, child=False):
"""
Process match value from this pattern configuration.
:param match:
:return:
"""
keys = self._match_config_property_keys(match, child=child)
pattern_value = get_first_defined(self.values, keys, self._default_value)
if pattern_value:
match.value = pattern_value
def _process_match_formatter(self, match, child=False):
"""
Process match formatter from this pattern configuration.
:param match:
:return:
"""
included = self._should_include_children if child else self._should_include_parent
if included or self.format_all:
keys = self._match_config_property_keys(match, child=child)
match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
def _process_match_validator(self, match, child=False):
"""
Process match validation from this pattern configuration.
:param match:
:return: True if match is validated by the configured validator, False otherwise.
"""
included = self._should_include_children if child else self._should_include_parent
if included or self.validate_all:
keys = self._match_config_property_keys(match, child=child)
validator = get_first_defined(self.validators, keys, self._default_validator)
if validator and not validator(match):
return False
return True
def _process_match(self, match, match_index, child=False):
"""
Process match from this pattern by setting all properties from defined configuration
(index, private, value, formatter, validator, ...).
:param match:
:type match:
:return: True if match is validated by the configured validator, False otherwise.
:rtype:
"""
self._process_match_index(match, match_index)
self._process_match_private(match, child)
self._process_match_value(match, child)
self._process_match_formatter(match, child)
return self._process_match_validator(match, child)
@staticmethod
def _process_match_processor(match, processor):
if processor:
ret = processor(match)
if ret is not None:
return ret
return match
def _process_matches(self, match, match_index):
"""
Process and generate all matches for the given unprocessed match.
:param match:
:param match_index:
:return: Process and dispatched matches.
"""
match = self._process_match_processor(match, self.pre_match_processor)
if not match:
return
if not self._process_match(match, match_index):
return
for child in match.children:
if not self._process_match(child, match_index, child=True):
return
match = self._process_match_processor(match, self.post_match_processor)
if not match:
return
if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
yield match
if self._should_include_children or self.private_children:
children = [x for x in match.children if x.name not in self.ignore_names]
for child in children:
yield child
def _post_process_matches(self, matches):
"""
Post process matches with user defined function
:param matches:
@ -246,32 +333,6 @@ class Pattern(object):
return self.post_processor(matches, self)
return matches
def _matches_privatize(self, matches):
"""
Mark matches included in private_names with private flag.
:param matches:
:type matches:
:return:
:rtype:
"""
if self.private_names:
for match in matches:
if match.name in self.private_names:
match.private = True
def _matches_ignore(self, matches):
"""
Ignore matches included in ignore_names.
:param matches:
:type matches:
:return:
:rtype:
"""
if self.ignore_names:
for match in list(matches):
if match.name in self.ignore_names:
matches.remove(match)
@abstractproperty
def patterns(self): # pragma: no cover
"""
@ -306,7 +367,7 @@ class Pattern(object):
@abstractmethod
def _match(self, pattern, input_string, context=None): # pragma: no cover
"""
Computes all matches for a given pattern and input
Computes all unprocess matches for a given pattern and input.
:param pattern: the pattern to use
:param input_string: the string to parse
@ -350,7 +411,9 @@ class StringPattern(Pattern):
def _match(self, pattern, input_string, context=None):
for index in find_all(input_string, pattern, **self._kwargs):
yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
if match:
yield match
class RePattern(Pattern):
@ -411,15 +474,18 @@ class RePattern(Pattern):
for start, end in match_object.spans(i):
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs)
main_match.children.append(child_match)
if child_match:
main_match.children.append(child_match)
else:
start, end = match_object.span(i)
if start > -1 and end > -1:
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs)
main_match.children.append(child_match)
if child_match:
main_match.children.append(child_match)
yield main_match
if main_match:
yield main_match
class FunctionalPattern(Pattern):
@ -457,14 +523,18 @@ class FunctionalPattern(Pattern):
if self._match_kwargs:
options = self._match_kwargs.copy()
options.update(args)
yield Match(pattern=self, input_string=input_string, **options)
match = Match(pattern=self, input_string=input_string, **options)
if match:
yield match
else:
kwargs = self._match_kwargs
if isinstance(args[-1], dict):
kwargs = dict(kwargs)
kwargs.update(args[-1])
args = args[:-1]
yield Match(*args, pattern=self, input_string=input_string, **kwargs)
match = Match(*args, pattern=self, input_string=input_string, **kwargs)
if match:
yield match
def filter_match_kwargs(kwargs, children=False):

View file

@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk
"""
from logging import getLogger
from .builder import Builder
from .match import Matches
from .pattern import RePattern, StringPattern, FunctionalPattern
from .chain import Chain
from .processors import ConflictSolver, PrivateRemover
from .loose import set_defaults
from .utils import extend_safe
from .rules import Rules
from .utils import extend_safe
log = getLogger(__name__).log
class Rebulk(object):
class Rebulk(Builder):
r"""
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
@ -44,6 +40,7 @@ class Rebulk(object):
>>> bulk.matches("the lakers are from la")
[<lakers:(4, 10)>, <la:(20, 22)>]
"""
# pylint:disable=protected-access
def __init__(self, disabled=lambda context: False, default_rules=True):
@ -56,6 +53,7 @@ class Rebulk(object):
:return:
:rtype:
"""
super(Rebulk, self).__init__()
if not callable(disabled):
self.disabled = lambda context: disabled
else:
@ -64,11 +62,6 @@ class Rebulk(object):
self._rules = Rules()
if default_rules:
self.rules(ConflictSolver, PrivateRemover)
self._defaults = {}
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
self._chain_defaults = {}
self._rebulks = []
def pattern(self, *pattern):
@ -83,172 +76,6 @@ class Rebulk(object):
self._patterns.extend(pattern)
return self
def defaults(self, **kwargs):
"""
Define default keyword arguments for all patterns
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._defaults = kwargs
return self
def regex_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._regex_defaults = kwargs
return self
def regex(self, *pattern, **kwargs):
"""
Add re pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_re(*pattern, **kwargs))
return self
def build_re(self, *pattern, **kwargs):
"""
Builds a new regular expression pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._regex_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return RePattern(*pattern, **kwargs)
def string_defaults(self, **kwargs):
"""
Define default keyword arguments for string patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._string_defaults = kwargs
return self
def string(self, *pattern, **kwargs):
"""
Add string pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_string(*pattern, **kwargs))
return self
def build_string(self, *pattern, **kwargs):
"""
Builds a new string pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._string_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return StringPattern(*pattern, **kwargs)
def functional_defaults(self, **kwargs):
"""
Define default keyword arguments for functional patterns.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._functional_defaults = kwargs
return self
def functional(self, *pattern, **kwargs):
"""
Add functional pattern
:param pattern:
:type pattern:
:return: self
:rtype: Rebulk
"""
self.pattern(self.build_functional(*pattern, **kwargs))
return self
def build_functional(self, *pattern, **kwargs):
"""
Builds a new functional pattern
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._functional_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return FunctionalPattern(*pattern, **kwargs)
def chain_defaults(self, **kwargs):
"""
Define default keyword arguments for patterns chain.
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
self._chain_defaults = kwargs
return self
def chain(self, **kwargs):
"""
Add patterns chain, using configuration of this rebulk
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
chain = self.build_chain(**kwargs)
self._patterns.append(chain)
return chain
def build_chain(self, **kwargs):
"""
Builds a new patterns chain
:param pattern:
:type pattern:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
set_defaults(self._chain_defaults, kwargs)
set_defaults(self._defaults, kwargs)
return Chain(self, **kwargs)
def rules(self, *rules):
"""
Add rules as a module, class or instance.

View file

@ -2,11 +2,11 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
import re
from functools import partial
from rebulk.pattern import FunctionalPattern, StringPattern, RePattern
from ..rebulk import Rebulk
from ..validators import chars_surround
from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
def test_chain_close():
@ -63,18 +63,61 @@ def test_build_chain():
def test_chain_defaults():
rebulk = Rebulk()
rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True)
rebulk.chain()\
rebulk.chain() \
.regex("(?P<test>test)") \
.regex(" ").repeater("*") \
.regex("(?P<best>best)") \
.regex(" ").repeater("*") \
.regex("(?P<testIgnore>testIgnore)")
matches = rebulk.matches("test testIgnore")
matches = rebulk.matches("test best testIgnore")
assert len(matches) == 1
assert matches[0].name == "test"
def test_chain_with_validators():
def chain_validator(match):
return match.value.startswith('t') and match.value.endswith('t')
def default_validator(match):
return match.value.startswith('t') and match.value.endswith('g')
def custom_validator(match):
return match.value.startswith('b') and match.value.endswith('t')
rebulk = Rebulk()
rebulk.defaults(children=True, validator=default_validator)
rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \
.regex("(?P<test>testing)", validator=default_validator).repeater("+") \
.regex(" ").repeater("+") \
.regex("(?P<best>best)", validator=custom_validator).repeater("+")
matches = rebulk.matches("some testing best end")
assert len(matches) == 2
assert matches[0].name == "test"
assert matches[1].name == "best"
def test_matches_docs():
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \
.defaults(children=True, formatter={'episode': int, 'version': int}) \
.chain() \
.regex(r'e(?P<episode>\d{1,4})').repeater(1) \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
.close() # .repeater(1) could be omitted as it's the default behavior
result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict
assert 'episode' in result
assert result['episode'] == [14, 15, 16, 17]
assert 'version' in result
assert result['version'] == 2
def test_matches():
rebulk = Rebulk()
@ -144,8 +187,8 @@ def test_matches():
def test_matches_2():
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.chain(children=True, formatter={'episode': int}) \
.defaults(formatter={'version': int}) \
.defaults(children=True, formatter={'episode': int, 'version': int}) \
.chain() \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
@ -173,25 +216,32 @@ def test_matches_2():
def test_matches_3():
alt_dash = (r'@', r'[\W_]') # abbreviation
rebulk = Rebulk()
match_names = ['season', 'episode']
other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
private_names=['episodeSeparator', 'seasonSeparator'],
children=True,
private_parent=True,
conflict_solver=lambda match, other: match
if match.name in ['season', 'episode'] and other.name in
['screen_size', 'video_codec', 'audio_codec',
'audio_channels', 'container', 'date']
else '__default__') \
rebulk = Rebulk()
rebulk.defaults(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
private_names=['episodeSeparator', 'seasonSeparator'],
children=True,
private_parent=True,
conflict_solver=lambda match, other: match
if match.name in match_names and other.name in other_names
else '__default__')
rebulk.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
.close() \
.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
.close() \
.chain() \
.defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)') \
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
@ -240,11 +290,11 @@ def test_matches_4():
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
rebulk.defaults(validate_all=True, children=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \
.defaults(formatter={'episode': int, 'version': int}) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
@ -262,11 +312,11 @@ def test_matches_5():
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True,
formatter={'episode': int, 'version': int}) \
.defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
@ -288,7 +338,7 @@ def test_matches_6():
validator=None, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')

View file

@ -2,19 +2,15 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
from .default_rules_module import RuleRemove0
from .. import debug
from ..match import Match
from ..pattern import StringPattern
from ..rebulk import Rebulk
from ..match import Match
from .. import debug
from .default_rules_module import RuleRemove0
class TestDebug(object):
#request.addfinalizer(disable_debug)
# request.addfinalizer(disable_debug)
debug.DEBUG = True
pattern = StringPattern(1, 3, value="es")
@ -38,43 +34,43 @@ class TestDebug(object):
debug.DEBUG = False
def test_pattern(self):
assert self.pattern.defined_at.lineno == 20
assert self.pattern.defined_at.lineno > 0
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
assert self.pattern.defined_at.filename.endswith('test_debug.py')
assert str(self.pattern.defined_at) == 'test_debug.py#L20'
assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
assert str(self.pattern.defined_at).startswith('test_debug.py#L')
assert repr(self.pattern).startswith('<StringPattern@test_debug.py#L')
def test_match(self):
assert self.match.defined_at.lineno == 22
assert self.match.defined_at.lineno > 0
assert self.match.defined_at.name == 'rebulk.test.test_debug'
assert self.match.defined_at.filename.endswith('test_debug.py')
assert str(self.match.defined_at) == 'test_debug.py#L22'
assert str(self.match.defined_at).startswith('test_debug.py#L')
def test_rule(self):
assert self.rule.defined_at.lineno == 23
assert self.rule.defined_at.lineno > 0
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
assert self.rule.defined_at.filename.endswith('test_debug.py')
assert str(self.rule.defined_at) == 'test_debug.py#L23'
assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
assert str(self.rule.defined_at).startswith('test_debug.py#L')
assert repr(self.rule).startswith('<RuleRemove0@test_debug.py#L')
def test_rebulk(self):
"""
This test fails on travis CI, can't find out why there's 1 line offset ...
"""
assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
assert self.rebulk._patterns[0].defined_at.lineno > 0
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L')
assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
assert self.rebulk._patterns[1].defined_at.lineno > 0
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L')
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at

View file

@ -116,6 +116,9 @@ class TestMatchesClass(object):
assert "tag1" in matches.tags
assert "tag2" in matches.tags
assert self.match3.tagged("tag1")
assert not self.match3.tagged("start")
tag1 = matches.tagged("tag1")
assert len(tag1) == 2
assert tag1[0] == self.match2

View file

@ -62,9 +62,20 @@ def validators(*chained_validators):
:return:
:rtype:
"""
def validator_chain(match): # pylint:disable=missing-docstring
for chained_validator in chained_validators:
if not chained_validator(match):
return False
return True
return validator_chain
def allways_true(match): # pylint:disable=unused-argument
"""
A validator which is allways true
:param match:
:return:
"""
return True