Update guessit to 3.0.3

Also updates:
- babelfish-0.5.5
- python-dateutil-2.7.5
- rebulk-1.0.0
- six-1.12.0
This commit is contained in:
Labrys of Knossos 2018-12-15 00:55:30 -05:00
parent 05b0fb498f
commit 2eb9d9dc7c
120 changed files with 17964 additions and 4530 deletions

BIN
libs/bin/guessit.exe Normal file

Binary file not shown.

View file

@ -1,9 +1,8 @@
"""
Copyright (c) 2003-2010 Gustavo Niemeyer <gustavo@niemeyer.net>
# -*- coding: utf-8 -*-
try:
from ._version import version as __version__
except ImportError:
__version__ = 'unknown'
This module offers extensions to the standard python 2.3+
datetime module.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
__version__ = "1.5"
__all__ = ['easter', 'parser', 'relativedelta', 'rrule', 'tz',
'utils', 'zoneinfo']

43
libs/dateutil/_common.py Normal file
View file

@ -0,0 +1,43 @@
"""
Common code used in multiple modules.
"""
class weekday(object):
__slots__ = ["weekday", "n"]
def __init__(self, weekday, n=None):
self.weekday = weekday
self.n = n
def __call__(self, n):
if n == self.n:
return self
else:
return self.__class__(self.weekday, n)
def __eq__(self, other):
try:
if self.weekday != other.weekday or self.n != other.n:
return False
except AttributeError:
return False
return True
def __hash__(self):
return hash((
self.weekday,
self.n,
))
def __ne__(self, other):
return not (self == other)
def __repr__(self):
s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
if not self.n:
return s
else:
return "%s(%+d)" % (s, self.n)
# vim:ts=4:sw=4:et

View file

@ -0,0 +1,4 @@
# coding: utf-8
# file generated by setuptools_scm
# don't change, don't track in version control
version = '2.7.5'

View file

@ -1,19 +1,17 @@
# -*- coding: utf-8 -*-
"""
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
This module offers a generic easter computing method for any given year, using
Western, Orthodox or Julian algorithms.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
import datetime
__all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]
EASTER_JULIAN = 1
EASTER_JULIAN = 1
EASTER_ORTHODOX = 2
EASTER_WESTERN = 3
EASTER_WESTERN = 3
def easter(year, method=EASTER_WESTERN):
"""
@ -25,7 +23,7 @@ def easter(year, method=EASTER_WESTERN):
This algorithm implements three different easter
calculation methods:
1 - Original calculation in Julian calendar, valid in
dates after 326 AD
2 - Original method, with date converted to Gregorian
@ -35,24 +33,24 @@ def easter(year, method=EASTER_WESTERN):
These methods are represented by the constants:
EASTER_JULIAN = 1
EASTER_ORTHODOX = 2
EASTER_WESTERN = 3
* ``EASTER_JULIAN = 1``
* ``EASTER_ORTHODOX = 2``
* ``EASTER_WESTERN = 3``
The default method is method 3.
More about the algorithm may be found at:
http://users.chariot.net.au/~gmarts/eastalg.htm
`GM Arts: Easter Algorithms <http://www.gmarts.org/index.php?go=415>`_
and
http://www.tondering.dk/claus/calendar.html
`The Calendar FAQ: Easter <https://www.tondering.dk/claus/cal/easter.php>`_
"""
if not (1 <= method <= 3):
raise ValueError, "invalid method"
raise ValueError("invalid method")
# g - Golden year - 1
# c - Century
@ -69,24 +67,23 @@ def easter(year, method=EASTER_WESTERN):
e = 0
if method < 3:
# Old method
i = (19*g+15)%30
j = (y+y//4+i)%7
i = (19*g + 15) % 30
j = (y + y//4 + i) % 7
if method == 2:
# Extra dates to convert Julian to Gregorian date
e = 10
if y > 1600:
e = e+y//100-16-(y//100-16)//4
e = e + y//100 - 16 - (y//100 - 16)//4
else:
# New method
c = y//100
h = (c-c//4-(8*c+13)//25+19*g+15)%30
i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11))
j = (y+y//4+i+2-c+c//4)%7
h = (c - c//4 - (8*c + 13)//25 + 19*g + 15) % 30
i = h - (h//28)*(1 - (h//28)*(29//(h + 1))*((21 - g)//11))
j = (y + y//4 + i + 2 - c + c//4) % 7
# p can be from -6 to 56 corresponding to dates 22 March to 23 May
# (later dates apply to method 2, although 23 May never actually occurs)
p = i-j+e
d = 1+(p+27+(p+6)//40)%31
m = 3+(p+26)//30
return datetime.date(int(y),int(m),int(d))
p = i - j + e
d = 1 + (p + 27 + (p + 6)//40) % 31
m = 3 + (p + 26)//30
return datetime.date(int(y), int(m), int(d))

View file

@ -1,886 +0,0 @@
# -*- coding:iso-8859-1 -*-
"""
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
import datetime
import string
import time
import sys
import os
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
import relativedelta
import tz
__all__ = ["parse", "parserinfo"]
# Some pointers:
#
# http://www.cl.cam.ac.uk/~mgk25/iso-time.html
# http://www.iso.ch/iso/en/prods-services/popstds/datesandtime.html
# http://www.w3.org/TR/NOTE-datetime
# http://ringmaster.arc.nasa.gov/tools/time_formats.html
# http://search.cpan.org/author/MUIR/Time-modules-2003.0211/lib/Time/ParseDate.pm
# http://stein.cshl.org/jade/distrib/docs/java.text.SimpleDateFormat.html
class _timelex(object):
def __init__(self, instream):
if isinstance(instream, basestring):
instream = StringIO(instream)
self.instream = instream
self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
'ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
self.numchars = '0123456789'
self.whitespace = ' \t\r\n'
self.charstack = []
self.tokenstack = []
self.eof = False
def get_token(self):
if self.tokenstack:
return self.tokenstack.pop(0)
seenletters = False
token = None
state = None
wordchars = self.wordchars
numchars = self.numchars
whitespace = self.whitespace
while not self.eof:
if self.charstack:
nextchar = self.charstack.pop(0)
else:
nextchar = self.instream.read(1)
while nextchar == '\x00':
nextchar = self.instream.read(1)
if not nextchar:
self.eof = True
break
elif not state:
token = nextchar
if nextchar in wordchars:
state = 'a'
elif nextchar in numchars:
state = '0'
elif nextchar in whitespace:
token = ' '
break # emit token
else:
break # emit token
elif state == 'a':
seenletters = True
if nextchar in wordchars:
token += nextchar
elif nextchar == '.':
token += nextchar
state = 'a.'
else:
self.charstack.append(nextchar)
break # emit token
elif state == '0':
if nextchar in numchars:
token += nextchar
elif nextchar == '.':
token += nextchar
state = '0.'
else:
self.charstack.append(nextchar)
break # emit token
elif state == 'a.':
seenletters = True
if nextchar == '.' or nextchar in wordchars:
token += nextchar
elif nextchar in numchars and token[-1] == '.':
token += nextchar
state = '0.'
else:
self.charstack.append(nextchar)
break # emit token
elif state == '0.':
if nextchar == '.' or nextchar in numchars:
token += nextchar
elif nextchar in wordchars and token[-1] == '.':
token += nextchar
state = 'a.'
else:
self.charstack.append(nextchar)
break # emit token
if (state in ('a.', '0.') and
(seenletters or token.count('.') > 1 or token[-1] == '.')):
l = token.split('.')
token = l[0]
for tok in l[1:]:
self.tokenstack.append('.')
if tok:
self.tokenstack.append(tok)
return token
def __iter__(self):
return self
def next(self):
token = self.get_token()
if token is None:
raise StopIteration
return token
def split(cls, s):
return list(cls(s))
split = classmethod(split)
class _resultbase(object):
def __init__(self):
for attr in self.__slots__:
setattr(self, attr, None)
def _repr(self, classname):
l = []
for attr in self.__slots__:
value = getattr(self, attr)
if value is not None:
l.append("%s=%s" % (attr, `value`))
return "%s(%s)" % (classname, ", ".join(l))
def __repr__(self):
return self._repr(self.__class__.__name__)
class parserinfo(object):
# m from a.m/p.m, t from ISO T separator
JUMP = [" ", ".", ",", ";", "-", "/", "'",
"at", "on", "and", "ad", "m", "t", "of",
"st", "nd", "rd", "th"]
WEEKDAYS = [("Mon", "Monday"),
("Tue", "Tuesday"),
("Wed", "Wednesday"),
("Thu", "Thursday"),
("Fri", "Friday"),
("Sat", "Saturday"),
("Sun", "Sunday")]
MONTHS = [("Jan", "January"),
("Feb", "February"),
("Mar", "March"),
("Apr", "April"),
("May", "May"),
("Jun", "June"),
("Jul", "July"),
("Aug", "August"),
("Sep", "September"),
("Oct", "October"),
("Nov", "November"),
("Dec", "December")]
HMS = [("h", "hour", "hours"),
("m", "minute", "minutes"),
("s", "second", "seconds")]
AMPM = [("am", "a"),
("pm", "p")]
UTCZONE = ["UTC", "GMT", "Z"]
PERTAIN = ["of"]
TZOFFSET = {}
def __init__(self, dayfirst=False, yearfirst=False):
self._jump = self._convert(self.JUMP)
self._weekdays = self._convert(self.WEEKDAYS)
self._months = self._convert(self.MONTHS)
self._hms = self._convert(self.HMS)
self._ampm = self._convert(self.AMPM)
self._utczone = self._convert(self.UTCZONE)
self._pertain = self._convert(self.PERTAIN)
self.dayfirst = dayfirst
self.yearfirst = yearfirst
self._year = time.localtime().tm_year
self._century = self._year//100*100
def _convert(self, lst):
dct = {}
for i in range(len(lst)):
v = lst[i]
if isinstance(v, tuple):
for v in v:
dct[v.lower()] = i
else:
dct[v.lower()] = i
return dct
def jump(self, name):
return name.lower() in self._jump
def weekday(self, name):
if len(name) >= 3:
try:
return self._weekdays[name.lower()]
except KeyError:
pass
return None
def month(self, name):
if len(name) >= 3:
try:
return self._months[name.lower()]+1
except KeyError:
pass
return None
def hms(self, name):
try:
return self._hms[name.lower()]
except KeyError:
return None
def ampm(self, name):
try:
return self._ampm[name.lower()]
except KeyError:
return None
def pertain(self, name):
return name.lower() in self._pertain
def utczone(self, name):
return name.lower() in self._utczone
def tzoffset(self, name):
if name in self._utczone:
return 0
return self.TZOFFSET.get(name)
def convertyear(self, year):
if year < 100:
year += self._century
if abs(year-self._year) >= 50:
if year < self._year:
year += 100
else:
year -= 100
return year
def validate(self, res):
# move to info
if res.year is not None:
res.year = self.convertyear(res.year)
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
res.tzname = "UTC"
res.tzoffset = 0
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
res.tzoffset = 0
return True
class parser(object):
def __init__(self, info=None):
self.info = info or parserinfo()
def parse(self, timestr, default=None,
ignoretz=False, tzinfos=None,
**kwargs):
if not default:
default = datetime.datetime.now().replace(hour=0, minute=0,
second=0, microsecond=0)
res = self._parse(timestr, **kwargs)
if res is None:
raise ValueError, "unknown string format"
repl = {}
for attr in ["year", "month", "day", "hour",
"minute", "second", "microsecond"]:
value = getattr(res, attr)
if value is not None:
repl[attr] = value
ret = default.replace(**repl)
if res.weekday is not None and not res.day:
ret = ret+relativedelta.relativedelta(weekday=res.weekday)
if not ignoretz:
if callable(tzinfos) or tzinfos and res.tzname in tzinfos:
if callable(tzinfos):
tzdata = tzinfos(res.tzname, res.tzoffset)
else:
tzdata = tzinfos.get(res.tzname)
if isinstance(tzdata, datetime.tzinfo):
tzinfo = tzdata
elif isinstance(tzdata, basestring):
tzinfo = tz.tzstr(tzdata)
elif isinstance(tzdata, int):
tzinfo = tz.tzoffset(res.tzname, tzdata)
else:
raise ValueError, "offset must be tzinfo subclass, " \
"tz string, or int offset"
ret = ret.replace(tzinfo=tzinfo)
elif res.tzname and res.tzname in time.tzname:
ret = ret.replace(tzinfo=tz.tzlocal())
elif res.tzoffset == 0:
ret = ret.replace(tzinfo=tz.tzutc())
elif res.tzoffset:
ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
return ret
class _result(_resultbase):
__slots__ = ["year", "month", "day", "weekday",
"hour", "minute", "second", "microsecond",
"tzname", "tzoffset"]
def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False):
info = self.info
if dayfirst is None:
dayfirst = info.dayfirst
if yearfirst is None:
yearfirst = info.yearfirst
res = self._result()
l = _timelex.split(timestr)
try:
# year/month/day list
ymd = []
# Index of the month string in ymd
mstridx = -1
len_l = len(l)
i = 0
while i < len_l:
# Check if it's a number
try:
value_repr = l[i]
value = float(value_repr)
except ValueError:
value = None
if value is not None:
# Token is a number
len_li = len(l[i])
i += 1
if (len(ymd) == 3 and len_li in (2, 4)
and (i >= len_l or (l[i] != ':' and
info.hms(l[i]) is None))):
# 19990101T23[59]
s = l[i-1]
res.hour = int(s[:2])
if len_li == 4:
res.minute = int(s[2:])
elif len_li == 6 or (len_li > 6 and l[i-1].find('.') == 6):
# YYMMDD or HHMMSS[.ss]
s = l[i-1]
if not ymd and l[i-1].find('.') == -1:
ymd.append(info.convertyear(int(s[:2])))
ymd.append(int(s[2:4]))
ymd.append(int(s[4:]))
else:
# 19990101T235959[.59]
res.hour = int(s[:2])
res.minute = int(s[2:4])
res.second, res.microsecond = _parsems(s[4:])
elif len_li == 8:
# YYYYMMDD
s = l[i-1]
ymd.append(int(s[:4]))
ymd.append(int(s[4:6]))
ymd.append(int(s[6:]))
elif len_li in (12, 14):
# YYYYMMDDhhmm[ss]
s = l[i-1]
ymd.append(int(s[:4]))
ymd.append(int(s[4:6]))
ymd.append(int(s[6:8]))
res.hour = int(s[8:10])
res.minute = int(s[10:12])
if len_li == 14:
res.second = int(s[12:])
elif ((i < len_l and info.hms(l[i]) is not None) or
(i+1 < len_l and l[i] == ' ' and
info.hms(l[i+1]) is not None)):
# HH[ ]h or MM[ ]m or SS[.ss][ ]s
if l[i] == ' ':
i += 1
idx = info.hms(l[i])
while True:
if idx == 0:
res.hour = int(value)
if value%1:
res.minute = int(60*(value%1))
elif idx == 1:
res.minute = int(value)
if value%1:
res.second = int(60*(value%1))
elif idx == 2:
res.second, res.microsecond = \
_parsems(value_repr)
i += 1
if i >= len_l or idx == 2:
break
# 12h00
try:
value_repr = l[i]
value = float(value_repr)
except ValueError:
break
else:
i += 1
idx += 1
if i < len_l:
newidx = info.hms(l[i])
if newidx is not None:
idx = newidx
elif i+1 < len_l and l[i] == ':':
# HH:MM[:SS[.ss]]
res.hour = int(value)
i += 1
value = float(l[i])
res.minute = int(value)
if value%1:
res.second = int(60*(value%1))
i += 1
if i < len_l and l[i] == ':':
res.second, res.microsecond = _parsems(l[i+1])
i += 2
elif i < len_l and l[i] in ('-', '/', '.'):
sep = l[i]
ymd.append(int(value))
i += 1
if i < len_l and not info.jump(l[i]):
try:
# 01-01[-01]
ymd.append(int(l[i]))
except ValueError:
# 01-Jan[-01]
value = info.month(l[i])
if value is not None:
ymd.append(value)
assert mstridx == -1
mstridx = len(ymd)-1
else:
return None
i += 1
if i < len_l and l[i] == sep:
# We have three members
i += 1
value = info.month(l[i])
if value is not None:
ymd.append(value)
mstridx = len(ymd)-1
assert mstridx == -1
else:
ymd.append(int(l[i]))
i += 1
elif i >= len_l or info.jump(l[i]):
if i+1 < len_l and info.ampm(l[i+1]) is not None:
# 12 am
res.hour = int(value)
if res.hour < 12 and info.ampm(l[i+1]) == 1:
res.hour += 12
elif res.hour == 12 and info.ampm(l[i+1]) == 0:
res.hour = 0
i += 1
else:
# Year, month or day
ymd.append(int(value))
i += 1
elif info.ampm(l[i]) is not None:
# 12am
res.hour = int(value)
if res.hour < 12 and info.ampm(l[i]) == 1:
res.hour += 12
elif res.hour == 12 and info.ampm(l[i]) == 0:
res.hour = 0
i += 1
elif not fuzzy:
return None
else:
i += 1
continue
# Check weekday
value = info.weekday(l[i])
if value is not None:
res.weekday = value
i += 1
continue
# Check month name
value = info.month(l[i])
if value is not None:
ymd.append(value)
assert mstridx == -1
mstridx = len(ymd)-1
i += 1
if i < len_l:
if l[i] in ('-', '/'):
# Jan-01[-99]
sep = l[i]
i += 1
ymd.append(int(l[i]))
i += 1
if i < len_l and l[i] == sep:
# Jan-01-99
i += 1
ymd.append(int(l[i]))
i += 1
elif (i+3 < len_l and l[i] == l[i+2] == ' '
and info.pertain(l[i+1])):
# Jan of 01
# In this case, 01 is clearly year
try:
value = int(l[i+3])
except ValueError:
# Wrong guess
pass
else:
# Convert it here to become unambiguous
ymd.append(info.convertyear(value))
i += 4
continue
# Check am/pm
value = info.ampm(l[i])
if value is not None:
if value == 1 and res.hour < 12:
res.hour += 12
elif value == 0 and res.hour == 12:
res.hour = 0
i += 1
continue
# Check for a timezone name
if (res.hour is not None and len(l[i]) <= 5 and
res.tzname is None and res.tzoffset is None and
not [x for x in l[i] if x not in string.ascii_uppercase]):
res.tzname = l[i]
res.tzoffset = info.tzoffset(res.tzname)
i += 1
# Check for something like GMT+3, or BRST+3. Notice
# that it doesn't mean "I am 3 hours after GMT", but
# "my time +3 is GMT". If found, we reverse the
# logic so that timezone parsing code will get it
# right.
if i < len_l and l[i] in ('+', '-'):
l[i] = ('+', '-')[l[i] == '+']
res.tzoffset = None
if info.utczone(res.tzname):
# With something like GMT+3, the timezone
# is *not* GMT.
res.tzname = None
continue
# Check for a numbered timezone
if res.hour is not None and l[i] in ('+', '-'):
signal = (-1,1)[l[i] == '+']
i += 1
len_li = len(l[i])
if len_li == 4:
# -0300
res.tzoffset = int(l[i][:2])*3600+int(l[i][2:])*60
elif i+1 < len_l and l[i+1] == ':':
# -03:00
res.tzoffset = int(l[i])*3600+int(l[i+2])*60
i += 2
elif len_li <= 2:
# -[0]3
res.tzoffset = int(l[i][:2])*3600
else:
return None
i += 1
res.tzoffset *= signal
# Look for a timezone name between parenthesis
if (i+3 < len_l and
info.jump(l[i]) and l[i+1] == '(' and l[i+3] == ')' and
3 <= len(l[i+2]) <= 5 and
not [x for x in l[i+2]
if x not in string.ascii_uppercase]):
# -0300 (BRST)
res.tzname = l[i+2]
i += 4
continue
# Check jumps
if not (info.jump(l[i]) or fuzzy):
return None
i += 1
# Process year/month/day
len_ymd = len(ymd)
if len_ymd > 3:
# More than three members!?
return None
elif len_ymd == 1 or (mstridx != -1 and len_ymd == 2):
# One member, or two members with a month string
if mstridx != -1:
res.month = ymd[mstridx]
del ymd[mstridx]
if len_ymd > 1 or mstridx == -1:
if ymd[0] > 31:
res.year = ymd[0]
else:
res.day = ymd[0]
elif len_ymd == 2:
# Two members with numbers
if ymd[0] > 31:
# 99-01
res.year, res.month = ymd
elif ymd[1] > 31:
# 01-99
res.month, res.year = ymd
elif dayfirst and ymd[1] <= 12:
# 13-01
res.day, res.month = ymd
else:
# 01-13
res.month, res.day = ymd
if len_ymd == 3:
# Three members
if mstridx == 0:
res.month, res.day, res.year = ymd
elif mstridx == 1:
if ymd[0] > 31 or (yearfirst and ymd[2] <= 31):
# 99-Jan-01
res.year, res.month, res.day = ymd
else:
# 01-Jan-01
# Give precendence to day-first, since
# two-digit years is usually hand-written.
res.day, res.month, res.year = ymd
elif mstridx == 2:
# WTF!?
if ymd[1] > 31:
# 01-99-Jan
res.day, res.year, res.month = ymd
else:
# 99-01-Jan
res.year, res.day, res.month = ymd
else:
if ymd[0] > 31 or \
(yearfirst and ymd[1] <= 12 and ymd[2] <= 31):
# 99-01-01
res.year, res.month, res.day = ymd
elif ymd[0] > 12 or (dayfirst and ymd[1] <= 12):
# 13-01-01
res.day, res.month, res.year = ymd
else:
# 01-13-01
res.month, res.day, res.year = ymd
except (IndexError, ValueError, AssertionError):
return None
if not info.validate(res):
return None
return res
DEFAULTPARSER = parser()
def parse(timestr, parserinfo=None, **kwargs):
if parserinfo:
return parser(parserinfo).parse(timestr, **kwargs)
else:
return DEFAULTPARSER.parse(timestr, **kwargs)
class _tzparser(object):
class _result(_resultbase):
__slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
"start", "end"]
class _attr(_resultbase):
__slots__ = ["month", "week", "weekday",
"yday", "jyday", "day", "time"]
def __repr__(self):
return self._repr("")
def __init__(self):
_resultbase.__init__(self)
self.start = self._attr()
self.end = self._attr()
def parse(self, tzstr):
res = self._result()
l = _timelex.split(tzstr)
try:
len_l = len(l)
i = 0
while i < len_l:
# BRST+3[BRDT[+2]]
j = i
while j < len_l and not [x for x in l[j]
if x in "0123456789:,-+"]:
j += 1
if j != i:
if not res.stdabbr:
offattr = "stdoffset"
res.stdabbr = "".join(l[i:j])
else:
offattr = "dstoffset"
res.dstabbr = "".join(l[i:j])
i = j
if (i < len_l and
(l[i] in ('+', '-') or l[i][0] in "0123456789")):
if l[i] in ('+', '-'):
# Yes, that's right. See the TZ variable
# documentation.
signal = (1,-1)[l[i] == '+']
i += 1
else:
signal = -1
len_li = len(l[i])
if len_li == 4:
# -0300
setattr(res, offattr,
(int(l[i][:2])*3600+int(l[i][2:])*60)*signal)
elif i+1 < len_l and l[i+1] == ':':
# -03:00
setattr(res, offattr,
(int(l[i])*3600+int(l[i+2])*60)*signal)
i += 2
elif len_li <= 2:
# -[0]3
setattr(res, offattr,
int(l[i][:2])*3600*signal)
else:
return None
i += 1
if res.dstabbr:
break
else:
break
if i < len_l:
for j in range(i, len_l):
if l[j] == ';': l[j] = ','
assert l[i] == ','
i += 1
if i >= len_l:
pass
elif (8 <= l.count(',') <= 9 and
not [y for x in l[i:] if x != ','
for y in x if y not in "0123456789"]):
# GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
for x in (res.start, res.end):
x.month = int(l[i])
i += 2
if l[i] == '-':
value = int(l[i+1])*-1
i += 1
else:
value = int(l[i])
i += 2
if value:
x.week = value
x.weekday = (int(l[i])-1)%7
else:
x.day = int(l[i])
i += 2
x.time = int(l[i])
i += 2
if i < len_l:
if l[i] in ('-','+'):
signal = (-1,1)[l[i] == "+"]
i += 1
else:
signal = 1
res.dstoffset = (res.stdoffset+int(l[i]))*signal
elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
not [y for x in l[i:] if x not in (',','/','J','M',
'.','-',':')
for y in x if y not in "0123456789"]):
for x in (res.start, res.end):
if l[i] == 'J':
# non-leap year day (1 based)
i += 1
x.jyday = int(l[i])
elif l[i] == 'M':
# month[-.]week[-.]weekday
i += 1
x.month = int(l[i])
i += 1
assert l[i] in ('-', '.')
i += 1
x.week = int(l[i])
if x.week == 5:
x.week = -1
i += 1
assert l[i] in ('-', '.')
i += 1
x.weekday = (int(l[i])-1)%7
else:
# year day (zero based)
x.yday = int(l[i])+1
i += 1
if i < len_l and l[i] == '/':
i += 1
# start time
len_li = len(l[i])
if len_li == 4:
# -0300
x.time = (int(l[i][:2])*3600+int(l[i][2:])*60)
elif i+1 < len_l and l[i+1] == ':':
# -03:00
x.time = int(l[i])*3600+int(l[i+2])*60
i += 2
if i+1 < len_l and l[i+1] == ':':
i += 2
x.time += int(l[i])
elif len_li <= 2:
# -[0]3
x.time = (int(l[i][:2])*3600)
else:
return None
i += 1
assert i == len_l or l[i] == ','
i += 1
assert i >= len_l
except (IndexError, ValueError, AssertionError):
return None
return res
DEFAULTTZPARSER = _tzparser()
def _parsetz(tzstr):
return DEFAULTTZPARSER.parse(tzstr)
def _parsems(value):
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
if "." not in value:
return int(value), 0
else:
i, f = value.split(".")
return int(i), int(f.ljust(6, "0")[:6])
# vim:ts=4:sw=4:et

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
from ._parser import parse, parser, parserinfo
from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
from ._parser import UnknownTimezoneWarning
from ._parser import __doc__
from .isoparser import isoparser, isoparse
__all__ = ['parse', 'parser', 'parserinfo',
'isoparse', 'isoparser',
'UnknownTimezoneWarning']
###
# Deprecate portions of the private interface so that downstream code that
# is improperly relying on it is given *some* notice.
def __deprecated_private_func(f):
from functools import wraps
import warnings
msg = ('{name} is a private function and may break without warning, '
'it will be moved and or renamed in future versions.')
msg = msg.format(name=f.__name__)
@wraps(f)
def deprecated_func(*args, **kwargs):
warnings.warn(msg, DeprecationWarning)
return f(*args, **kwargs)
return deprecated_func
def __deprecate_private_class(c):
import warnings
msg = ('{name} is a private class and may break without warning, '
'it will be moved and or renamed in future versions.')
msg = msg.format(name=c.__name__)
class private_class(c):
__doc__ = c.__doc__
def __init__(self, *args, **kwargs):
warnings.warn(msg, DeprecationWarning)
super(private_class, self).__init__(*args, **kwargs)
private_class.__name__ = c.__name__
return private_class
from ._parser import _timelex, _resultbase
from ._parser import _tzparser, _parsetz
_timelex = __deprecate_private_class(_timelex)
_tzparser = __deprecate_private_class(_tzparser)
_resultbase = __deprecate_private_class(_resultbase)
_parsetz = __deprecated_private_func(_parsetz)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,406 @@
# -*- coding: utf-8 -*-
"""
This module offers a parser for ISO-8601 strings
It is intended to support all valid date, time and datetime formats per the
ISO-8601 specification.
..versionadded:: 2.7.0
"""
from datetime import datetime, timedelta, time, date
import calendar
from dateutil import tz
from functools import wraps
import re
import six
__all__ = ["isoparse", "isoparser"]
def _takes_ascii(f):
@wraps(f)
def func(self, str_in, *args, **kwargs):
# If it's a stream, read the whole thing
str_in = getattr(str_in, 'read', lambda: str_in)()
# If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII
if isinstance(str_in, six.text_type):
# ASCII is the same in UTF-8
try:
str_in = str_in.encode('ascii')
except UnicodeEncodeError as e:
msg = 'ISO-8601 strings should contain only ASCII characters'
six.raise_from(ValueError(msg), e)
return f(self, str_in, *args, **kwargs)
return func
class isoparser(object):
def __init__(self, sep=None):
"""
:param sep:
A single character that separates date and time portions. If
``None``, the parser will accept any single character.
For strict ISO-8601 adherence, pass ``'T'``.
"""
if sep is not None:
if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'):
raise ValueError('Separator must be a single, non-numeric ' +
'ASCII character')
sep = sep.encode('ascii')
self._sep = sep
@_takes_ascii
def isoparse(self, dt_str):
"""
Parse an ISO-8601 datetime string into a :class:`datetime.datetime`.
An ISO-8601 datetime string consists of a date portion, followed
optionally by a time portion - the date and time portions are separated
by a single character separator, which is ``T`` in the official
standard. Incomplete date formats (such as ``YYYY-MM``) may *not* be
combined with a time portion.
Supported date formats are:
Common:
- ``YYYY``
- ``YYYY-MM`` or ``YYYYMM``
- ``YYYY-MM-DD`` or ``YYYYMMDD``
Uncommon:
- ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0)
- ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day
The ISO week and day numbering follows the same logic as
:func:`datetime.date.isocalendar`.
Supported time formats are:
- ``hh``
- ``hh:mm`` or ``hhmm``
- ``hh:mm:ss`` or ``hhmmss``
- ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits)
Midnight is a special case for `hh`, as the standard supports both
00:00 and 24:00 as a representation.
.. caution::
Support for fractional components other than seconds is part of the
ISO-8601 standard, but is not currently implemented in this parser.
Supported time zone offset formats are:
- `Z` (UTC)
- `±HH:MM`
- `±HHMM`
- `±HH`
Offsets will be represented as :class:`dateutil.tz.tzoffset` objects,
with the exception of UTC, which will be represented as
:class:`dateutil.tz.tzutc`. Time zone offsets equivalent to UTC (such
as `+00:00`) will also be represented as :class:`dateutil.tz.tzutc`.
:param dt_str:
A string or stream containing only an ISO-8601 datetime string
:return:
Returns a :class:`datetime.datetime` representing the string.
Unspecified components default to their lowest value.
.. warning::
As of version 2.7.0, the strictness of the parser should not be
considered a stable part of the contract. Any valid ISO-8601 string
that parses correctly with the default settings will continue to
parse correctly in future versions, but invalid strings that
currently fail (e.g. ``2017-01-01T00:00+00:00:00``) are not
guaranteed to continue failing in future versions if they encode
a valid date.
.. versionadded:: 2.7.0
"""
components, pos = self._parse_isodate(dt_str)
if len(dt_str) > pos:
if self._sep is None or dt_str[pos:pos + 1] == self._sep:
components += self._parse_isotime(dt_str[pos + 1:])
else:
raise ValueError('String contains unknown ISO components')
return datetime(*components)
@_takes_ascii
def parse_isodate(self, datestr):
"""
Parse the date portion of an ISO string.
:param datestr:
The string portion of an ISO string, without a separator
:return:
Returns a :class:`datetime.date` object
"""
components, pos = self._parse_isodate(datestr)
if pos < len(datestr):
raise ValueError('String contains unknown ISO ' +
'components: {}'.format(datestr))
return date(*components)
@_takes_ascii
def parse_isotime(self, timestr):
"""
Parse the time portion of an ISO string.
:param timestr:
The time portion of an ISO string, without a separator
:return:
Returns a :class:`datetime.time` object
"""
return time(*self._parse_isotime(timestr))
@_takes_ascii
def parse_tzstr(self, tzstr, zero_as_utc=True):
"""
Parse a valid ISO time zone string.
See :func:`isoparser.isoparse` for details on supported formats.
:param tzstr:
A string representing an ISO time zone offset
:param zero_as_utc:
Whether to return :class:`dateutil.tz.tzutc` for zero-offset zones
:return:
Returns :class:`dateutil.tz.tzoffset` for offsets and
:class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is
specified) offsets equivalent to UTC.
"""
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
# Constants
_MICROSECOND_END_REGEX = re.compile(b'[-+Z]+')
_DATE_SEP = b'-'
_TIME_SEP = b':'
_MICRO_SEP = b'.'
def _parse_isodate(self, dt_str):
try:
return self._parse_isodate_common(dt_str)
except ValueError:
return self._parse_isodate_uncommon(dt_str)
def _parse_isodate_common(self, dt_str):
len_str = len(dt_str)
components = [1, 1, 1]
if len_str < 4:
raise ValueError('ISO string too short')
# Year
components[0] = int(dt_str[0:4])
pos = 4
if pos >= len_str:
return components, pos
has_sep = dt_str[pos:pos + 1] == self._DATE_SEP
if has_sep:
pos += 1
# Month
if len_str - pos < 2:
raise ValueError('Invalid common month')
components[1] = int(dt_str[pos:pos + 2])
pos += 2
if pos >= len_str:
if has_sep:
return components, pos
else:
raise ValueError('Invalid ISO format')
if has_sep:
if dt_str[pos:pos + 1] != self._DATE_SEP:
raise ValueError('Invalid separator in ISO string')
pos += 1
# Day
if len_str - pos < 2:
raise ValueError('Invalid common day')
components[2] = int(dt_str[pos:pos + 2])
return components, pos + 2
def _parse_isodate_uncommon(self, dt_str):
if len(dt_str) < 4:
raise ValueError('ISO string too short')
# All ISO formats start with the year
year = int(dt_str[0:4])
has_sep = dt_str[4:5] == self._DATE_SEP
pos = 4 + has_sep # Skip '-' if it's there
if dt_str[pos:pos + 1] == b'W':
# YYYY-?Www-?D?
pos += 1
weekno = int(dt_str[pos:pos + 2])
pos += 2
dayno = 1
if len(dt_str) > pos:
if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep:
raise ValueError('Inconsistent use of dash separator')
pos += has_sep
dayno = int(dt_str[pos:pos + 1])
pos += 1
base_date = self._calculate_weekdate(year, weekno, dayno)
else:
# YYYYDDD or YYYY-DDD
if len(dt_str) - pos < 3:
raise ValueError('Invalid ordinal day')
ordinal_day = int(dt_str[pos:pos + 3])
pos += 3
if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)):
raise ValueError('Invalid ordinal day' +
' {} for year {}'.format(ordinal_day, year))
base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1)
components = [base_date.year, base_date.month, base_date.day]
return components, pos
def _calculate_weekdate(self, year, week, day):
"""
Calculate the day of corresponding to the ISO year-week-day calendar.
This function is effectively the inverse of
:func:`datetime.date.isocalendar`.
:param year:
The year in the ISO calendar
:param week:
The week in the ISO calendar - range is [1, 53]
:param day:
The day in the ISO calendar - range is [1 (MON), 7 (SUN)]
:return:
Returns a :class:`datetime.date`
"""
if not 0 < week < 54:
raise ValueError('Invalid week: {}'.format(week))
if not 0 < day < 8: # Range is 1-7
raise ValueError('Invalid weekday: {}'.format(day))
# Get week 1 for the specific year:
jan_4 = date(year, 1, 4) # Week 1 always has January 4th in it
week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1)
# Now add the specific number of weeks and days to get what we want
week_offset = (week - 1) * 7 + (day - 1)
return week_1 + timedelta(days=week_offset)
def _parse_isotime(self, timestr):
len_str = len(timestr)
components = [0, 0, 0, 0, None]
pos = 0
comp = -1
if len(timestr) < 2:
raise ValueError('ISO time too short')
has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
while pos < len_str and comp < 5:
comp += 1
if timestr[pos:pos + 1] in b'-+Z':
# Detect time zone boundary
components[-1] = self._parse_tzstr(timestr[pos:])
pos = len_str
break
if comp < 3:
# Hour, minute, second
components[comp] = int(timestr[pos:pos + 2])
pos += 2
if (has_sep and pos < len_str and
timestr[pos:pos + 1] == self._TIME_SEP):
pos += 1
if comp == 3:
# Microsecond
if timestr[pos:pos + 1] != self._MICRO_SEP:
continue
pos += 1
us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6],
1)[0]
components[comp] = int(us_str) * 10**(6 - len(us_str))
pos += len(us_str)
if pos < len_str:
raise ValueError('Unused components in ISO string')
if components[0] == 24:
# Standard supports 00:00 and 24:00 as representations of midnight
if any(component != 0 for component in components[1:4]):
raise ValueError('Hour may only be 24 at 24:00:00.000')
components[0] = 0
return components
def _parse_tzstr(self, tzstr, zero_as_utc=True):
if tzstr == b'Z':
return tz.tzutc()
if len(tzstr) not in {3, 5, 6}:
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
if tzstr[0:1] == b'-':
mult = -1
elif tzstr[0:1] == b'+':
mult = 1
else:
raise ValueError('Time zone offset requires sign')
hours = int(tzstr[1:3])
if len(tzstr) == 3:
minutes = 0
else:
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
if zero_as_utc and hours == 0 and minutes == 0:
return tz.tzutc()
else:
if minutes > 59:
raise ValueError('Invalid minutes in time zone offset')
if hours > 23:
raise ValueError('Invalid hours in time zone offset')
return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60)
DEFAULT_ISOPARSER = isoparser()
isoparse = DEFAULT_ISOPARSER.isoparse

View file

@ -1,109 +1,96 @@
"""
Copyright (c) 2003-2010 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
# -*- coding: utf-8 -*-
import datetime
import calendar
import operator
from math import copysign
from six import integer_types
from warnings import warn
from ._common import weekday
MO, TU, WE, TH, FR, SA, SU = weekdays = tuple(weekday(x) for x in range(7))
__all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
class weekday(object):
__slots__ = ["weekday", "n"]
def __init__(self, weekday, n=None):
self.weekday = weekday
self.n = n
def __call__(self, n):
if n == self.n:
return self
else:
return self.__class__(self.weekday, n)
def __eq__(self, other):
try:
if self.weekday != other.weekday or self.n != other.n:
return False
except AttributeError:
return False
return True
def __repr__(self):
s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
if not self.n:
return s
else:
return "%s(%+d)" % (s, self.n)
MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)])
class relativedelta:
class relativedelta(object):
"""
The relativedelta type is based on the specification of the excelent
work done by M.-A. Lemburg in his mx.DateTime extension. However,
notice that this type does *NOT* implement the same algorithm as
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
The relativedelta type is based on the specification of the excellent
work done by M.-A. Lemburg in his
`mx.DateTime <https://www.egenix.com/products/python/mxBase/mxDateTime/>`_ extension.
However, notice that this type does *NOT* implement the same algorithm as
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
There's two different ways to build a relativedelta instance. The
first one is passing it two date/datetime classes:
There are two different ways to build a relativedelta instance. The
first one is passing it two date/datetime classes::
relativedelta(datetime1, datetime2)
relativedelta(datetime1, datetime2)
And the other way is to use the following keyword arguments:
The second one is passing it any number of the following keyword arguments::
year, month, day, hour, minute, second, microsecond:
Absolute information.
relativedelta(arg1=x,arg2=y,arg3=z...)
years, months, weeks, days, hours, minutes, seconds, microseconds:
Relative information, may be negative.
year, month, day, hour, minute, second, microsecond:
Absolute information (argument is singular); adding or subtracting a
relativedelta with absolute information does not perform an arithmetic
operation, but rather REPLACES the corresponding value in the
original datetime with the value(s) in relativedelta.
weekday:
One of the weekday instances (MO, TU, etc). These instances may
receive a parameter N, specifying the Nth weekday, which could
be positive or negative (like MO(+1) or MO(-2). Not specifying
it is the same as specifying +1. You can also use an integer,
where 0=MO.
years, months, weeks, days, hours, minutes, seconds, microseconds:
Relative information, may be negative (argument is plural); adding
or subtracting a relativedelta with relative information performs
the corresponding aritmetic operation on the original datetime value
with the information in the relativedelta.
leapdays:
Will add given days to the date found, if year is a leap
year, and the date found is post 28 of february.
weekday:
One of the weekday instances (MO, TU, etc). These
instances may receive a parameter N, specifying the Nth
weekday, which could be positive or negative (like MO(+1)
or MO(-2). Not specifying it is the same as specifying
+1. You can also use an integer, where 0=MO. Notice that
if the calculated date is already Monday, for example,
using MO(1) or MO(-1) won't change the day.
yearday, nlyearday:
Set the yearday or the non-leap year day (jump leap days).
These are converted to day/month/leapdays information.
leapdays:
Will add given days to the date found, if year is a leap
year, and the date found is post 28 of february.
Here is the behavior of operations with relativedelta:
yearday, nlyearday:
Set the yearday or the non-leap year day (jump leap days).
These are converted to day/month/leapdays information.
1) Calculate the absolute year, using the 'year' argument, or the
original datetime year, if the argument is not present.
There are relative and absolute forms of the keyword
arguments. The plural is relative, and the singular is
absolute. For each argument in the order below, the absolute form
is applied first (by setting each attribute to that value) and
then the relative form (by adding the value to the attribute).
2) Add the relative 'years' argument to the absolute year.
The order of attributes considered when this relativedelta is
added to a datetime is:
3) Do steps 1 and 2 for month/months.
1. Year
2. Month
3. Day
4. Hours
5. Minutes
6. Seconds
7. Microseconds
4) Calculate the absolute day, using the 'day' argument, or the
original datetime day, if the argument is not present. Then,
subtract from the day until it fits in the year and month
found after their operations.
Finally, weekday is applied, using the rule described above.
5) Add the relative 'days' argument to the absolute day. Notice
that the 'weeks' argument is multiplied by 7 and added to
'days'.
For example
6) Do steps 1 and 2 for hour/hours, minute/minutes, second/seconds,
microsecond/microseconds.
>>> dt = datetime(2018, 4, 9, 13, 37, 0)
>>> delta = relativedelta(hours=25, day=1, weekday=MO(1))
datetime(2018, 4, 2, 14, 37, 0)
First, the day is set to 1 (the first of the month), then 25 hours
are added, to get to the 2nd day and 14th hour, finally the
weekday is applied, but since the 2nd is already a Monday there is
no effect.
7) If the 'weekday' argument is present, calculate the weekday,
with the given (wday, nth) tuple. wday is the index of the
weekday (0-6, 0=Mon), and nth is the number of weeks to add
forward or backward, depending on its signal. Notice that if
the calculated date is already Monday, for example, using
(0, 1) or (0, -1) won't change the day.
"""
def __init__(self, dt1=None, dt2=None,
@ -112,15 +99,22 @@ Here is the behavior of operations with relativedelta:
year=None, month=None, day=None, weekday=None,
yearday=None, nlyearday=None,
hour=None, minute=None, second=None, microsecond=None):
if dt1 and dt2:
if not isinstance(dt1, datetime.date) or \
not isinstance(dt2, datetime.date):
raise TypeError, "relativedelta only diffs datetime/date"
if type(dt1) is not type(dt2):
# datetime is a subclass of date. So both must be date
if not (isinstance(dt1, datetime.date) and
isinstance(dt2, datetime.date)):
raise TypeError("relativedelta only diffs datetime/date")
# We allow two dates, or two datetimes, so we coerce them to be
# of the same type
if (isinstance(dt1, datetime.datetime) !=
isinstance(dt2, datetime.datetime)):
if not isinstance(dt1, datetime.datetime):
dt1 = datetime.datetime.fromordinal(dt1.toordinal())
elif not isinstance(dt2, datetime.datetime):
dt2 = datetime.datetime.fromordinal(dt2.toordinal())
self.years = 0
self.months = 0
self.days = 0
@ -139,31 +133,48 @@ Here is the behavior of operations with relativedelta:
self.microsecond = None
self._has_time = 0
months = (dt1.year*12+dt1.month)-(dt2.year*12+dt2.month)
# Get year / month delta between the two
months = (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month)
self._set_months(months)
# Remove the year/month delta so the timedelta is just well-defined
# time units (seconds, days and microseconds)
dtm = self.__radd__(dt2)
# If we've overshot our target, make an adjustment
if dt1 < dt2:
while dt1 > dtm:
months += 1
self._set_months(months)
dtm = self.__radd__(dt2)
compare = operator.gt
increment = 1
else:
while dt1 < dtm:
months -= 1
self._set_months(months)
dtm = self.__radd__(dt2)
compare = operator.lt
increment = -1
while compare(dt1, dtm):
months += increment
self._set_months(months)
dtm = self.__radd__(dt2)
# Get the timedelta between the "months-adjusted" date and dt1
delta = dt1 - dtm
self.seconds = delta.seconds+delta.days*86400
self.seconds = delta.seconds + delta.days * 86400
self.microseconds = delta.microseconds
else:
self.years = years
self.months = months
self.days = days+weeks*7
# Check for non-integer values in integer-only quantities
if any(x is not None and x != int(x) for x in (years, months)):
raise ValueError("Non-integer years and months are "
"ambiguous and not currently supported.")
# Relative information
self.years = int(years)
self.months = int(months)
self.days = days + weeks * 7
self.leapdays = leapdays
self.hours = hours
self.minutes = minutes
self.seconds = seconds
self.microseconds = microseconds
# Absolute information
self.year = year
self.month = month
self.day = day
@ -172,7 +183,15 @@ Here is the behavior of operations with relativedelta:
self.second = second
self.microsecond = microsecond
if type(weekday) is int:
if any(x is not None and int(x) != x
for x in (year, month, day, hour,
minute, second, microsecond)):
# For now we'll deprecate floats - later it'll be an error.
warn("Non-integer value passed as absolute information. " +
"This is not a well-defined condition and will raise " +
"errors in future versions.", DeprecationWarning)
if isinstance(weekday, integer_types):
self.weekday = weekdays[weekday]
else:
self.weekday = weekday
@ -185,7 +204,8 @@ Here is the behavior of operations with relativedelta:
if yearday > 59:
self.leapdays = -1
if yday:
ydayidx = [31,59,90,120,151,181,212,243,273,304,334,366]
ydayidx = [31, 59, 90, 120, 151, 181, 212,
243, 273, 304, 334, 366]
for idx, ydays in enumerate(ydayidx):
if yday <= ydays:
self.month = idx+1
@ -195,56 +215,143 @@ Here is the behavior of operations with relativedelta:
self.day = yday-ydayidx[idx-1]
break
else:
raise ValueError, "invalid year day (%d)" % yday
raise ValueError("invalid year day (%d)" % yday)
self._fix()
def _fix(self):
if abs(self.microseconds) > 999999:
s = self.microseconds//abs(self.microseconds)
div, mod = divmod(self.microseconds*s, 1000000)
self.microseconds = mod*s
self.seconds += div*s
s = _sign(self.microseconds)
div, mod = divmod(self.microseconds * s, 1000000)
self.microseconds = mod * s
self.seconds += div * s
if abs(self.seconds) > 59:
s = self.seconds//abs(self.seconds)
div, mod = divmod(self.seconds*s, 60)
self.seconds = mod*s
self.minutes += div*s
s = _sign(self.seconds)
div, mod = divmod(self.seconds * s, 60)
self.seconds = mod * s
self.minutes += div * s
if abs(self.minutes) > 59:
s = self.minutes//abs(self.minutes)
div, mod = divmod(self.minutes*s, 60)
self.minutes = mod*s
self.hours += div*s
s = _sign(self.minutes)
div, mod = divmod(self.minutes * s, 60)
self.minutes = mod * s
self.hours += div * s
if abs(self.hours) > 23:
s = self.hours//abs(self.hours)
div, mod = divmod(self.hours*s, 24)
self.hours = mod*s
self.days += div*s
s = _sign(self.hours)
div, mod = divmod(self.hours * s, 24)
self.hours = mod * s
self.days += div * s
if abs(self.months) > 11:
s = self.months//abs(self.months)
div, mod = divmod(self.months*s, 12)
self.months = mod*s
self.years += div*s
if (self.hours or self.minutes or self.seconds or self.microseconds or
self.hour is not None or self.minute is not None or
self.second is not None or self.microsecond is not None):
s = _sign(self.months)
div, mod = divmod(self.months * s, 12)
self.months = mod * s
self.years += div * s
if (self.hours or self.minutes or self.seconds or self.microseconds
or self.hour is not None or self.minute is not None or
self.second is not None or self.microsecond is not None):
self._has_time = 1
else:
self._has_time = 0
@property
def weeks(self):
return int(self.days / 7.0)
@weeks.setter
def weeks(self, value):
self.days = self.days - (self.weeks * 7) + value * 7
def _set_months(self, months):
self.months = months
if abs(self.months) > 11:
s = self.months//abs(self.months)
div, mod = divmod(self.months*s, 12)
self.months = mod*s
self.years = div*s
s = _sign(self.months)
div, mod = divmod(self.months * s, 12)
self.months = mod * s
self.years = div * s
else:
self.years = 0
def __radd__(self, other):
def normalized(self):
"""
Return a version of this object represented entirely using integer
values for the relative attributes.
>>> relativedelta(days=1.5, hours=2).normalized()
relativedelta(days=1, hours=14)
:return:
Returns a :class:`dateutil.relativedelta.relativedelta` object.
"""
# Cascade remainders down (rounding each to roughly nearest microsecond)
days = int(self.days)
hours_f = round(self.hours + 24 * (self.days - days), 11)
hours = int(hours_f)
minutes_f = round(self.minutes + 60 * (hours_f - hours), 10)
minutes = int(minutes_f)
seconds_f = round(self.seconds + 60 * (minutes_f - minutes), 8)
seconds = int(seconds_f)
microseconds = round(self.microseconds + 1e6 * (seconds_f - seconds))
# Constructor carries overflow back up with call to _fix()
return self.__class__(years=self.years, months=self.months,
days=days, hours=hours, minutes=minutes,
seconds=seconds, microseconds=microseconds,
leapdays=self.leapdays, year=self.year,
month=self.month, day=self.day,
weekday=self.weekday, hour=self.hour,
minute=self.minute, second=self.second,
microsecond=self.microsecond)
def __add__(self, other):
if isinstance(other, relativedelta):
return self.__class__(years=other.years + self.years,
months=other.months + self.months,
days=other.days + self.days,
hours=other.hours + self.hours,
minutes=other.minutes + self.minutes,
seconds=other.seconds + self.seconds,
microseconds=(other.microseconds +
self.microseconds),
leapdays=other.leapdays or self.leapdays,
year=(other.year if other.year is not None
else self.year),
month=(other.month if other.month is not None
else self.month),
day=(other.day if other.day is not None
else self.day),
weekday=(other.weekday if other.weekday is not None
else self.weekday),
hour=(other.hour if other.hour is not None
else self.hour),
minute=(other.minute if other.minute is not None
else self.minute),
second=(other.second if other.second is not None
else self.second),
microsecond=(other.microsecond if other.microsecond
is not None else
self.microsecond))
if isinstance(other, datetime.timedelta):
return self.__class__(years=self.years,
months=self.months,
days=self.days + other.days,
hours=self.hours,
minutes=self.minutes,
seconds=self.seconds + other.seconds,
microseconds=self.microseconds + other.microseconds,
leapdays=self.leapdays,
year=self.year,
month=self.month,
day=self.day,
weekday=self.weekday,
hour=self.hour,
minute=self.minute,
second=self.second,
microsecond=self.microsecond)
if not isinstance(other, datetime.date):
raise TypeError, "unsupported type for add operation"
return NotImplemented
elif self._has_time and not isinstance(other, datetime.datetime):
other = datetime.datetime.fromordinal(other.toordinal())
year = (self.year or other.year)+self.years
@ -276,60 +383,70 @@ Here is the behavior of operations with relativedelta:
microseconds=self.microseconds))
if self.weekday:
weekday, nth = self.weekday.weekday, self.weekday.n or 1
jumpdays = (abs(nth)-1)*7
jumpdays = (abs(nth) - 1) * 7
if nth > 0:
jumpdays += (7-ret.weekday()+weekday)%7
jumpdays += (7 - ret.weekday() + weekday) % 7
else:
jumpdays += (ret.weekday()-weekday)%7
jumpdays += (ret.weekday() - weekday) % 7
jumpdays *= -1
ret += datetime.timedelta(days=jumpdays)
return ret
def __radd__(self, other):
return self.__add__(other)
def __rsub__(self, other):
return self.__neg__().__radd__(other)
def __add__(self, other):
if not isinstance(other, relativedelta):
raise TypeError, "unsupported type for add operation"
return relativedelta(years=other.years+self.years,
months=other.months+self.months,
days=other.days+self.days,
hours=other.hours+self.hours,
minutes=other.minutes+self.minutes,
seconds=other.seconds+self.seconds,
microseconds=other.microseconds+self.microseconds,
leapdays=other.leapdays or self.leapdays,
year=other.year or self.year,
month=other.month or self.month,
day=other.day or self.day,
weekday=other.weekday or self.weekday,
hour=other.hour or self.hour,
minute=other.minute or self.minute,
second=other.second or self.second,
microsecond=other.second or self.microsecond)
def __sub__(self, other):
if not isinstance(other, relativedelta):
raise TypeError, "unsupported type for sub operation"
return relativedelta(years=other.years-self.years,
months=other.months-self.months,
days=other.days-self.days,
hours=other.hours-self.hours,
minutes=other.minutes-self.minutes,
seconds=other.seconds-self.seconds,
microseconds=other.microseconds-self.microseconds,
leapdays=other.leapdays or self.leapdays,
year=other.year or self.year,
month=other.month or self.month,
day=other.day or self.day,
weekday=other.weekday or self.weekday,
hour=other.hour or self.hour,
minute=other.minute or self.minute,
second=other.second or self.second,
microsecond=other.second or self.microsecond)
return NotImplemented # In case the other object defines __rsub__
return self.__class__(years=self.years - other.years,
months=self.months - other.months,
days=self.days - other.days,
hours=self.hours - other.hours,
minutes=self.minutes - other.minutes,
seconds=self.seconds - other.seconds,
microseconds=self.microseconds - other.microseconds,
leapdays=self.leapdays or other.leapdays,
year=(self.year if self.year is not None
else other.year),
month=(self.month if self.month is not None else
other.month),
day=(self.day if self.day is not None else
other.day),
weekday=(self.weekday if self.weekday is not None else
other.weekday),
hour=(self.hour if self.hour is not None else
other.hour),
minute=(self.minute if self.minute is not None else
other.minute),
second=(self.second if self.second is not None else
other.second),
microsecond=(self.microsecond if self.microsecond
is not None else
other.microsecond))
def __abs__(self):
return self.__class__(years=abs(self.years),
months=abs(self.months),
days=abs(self.days),
hours=abs(self.hours),
minutes=abs(self.minutes),
seconds=abs(self.seconds),
microseconds=abs(self.microseconds),
leapdays=self.leapdays,
year=self.year,
month=self.month,
day=self.day,
weekday=self.weekday,
hour=self.hour,
minute=self.minute,
second=self.second,
microsecond=self.microsecond)
def __neg__(self):
return relativedelta(years=-self.years,
return self.__class__(years=-self.years,
months=-self.months,
days=-self.days,
hours=-self.hours,
@ -346,7 +463,7 @@ Here is the behavior of operations with relativedelta:
second=self.second,
microsecond=self.microsecond)
def __nonzero__(self):
def __bool__(self):
return not (not self.years and
not self.months and
not self.days and
@ -363,16 +480,22 @@ Here is the behavior of operations with relativedelta:
self.minute is None and
self.second is None and
self.microsecond is None)
# Compatibility with Python 2.x
__nonzero__ = __bool__
def __mul__(self, other):
f = float(other)
return relativedelta(years=self.years*f,
months=self.months*f,
days=self.days*f,
hours=self.hours*f,
minutes=self.minutes*f,
seconds=self.seconds*f,
microseconds=self.microseconds*f,
try:
f = float(other)
except TypeError:
return NotImplemented
return self.__class__(years=int(self.years * f),
months=int(self.months * f),
days=int(self.days * f),
hours=int(self.hours * f),
minutes=int(self.minutes * f),
seconds=int(self.seconds * f),
microseconds=int(self.microseconds * f),
leapdays=self.leapdays,
year=self.year,
month=self.month,
@ -383,9 +506,11 @@ Here is the behavior of operations with relativedelta:
second=self.second,
microsecond=self.microsecond)
__rmul__ = __mul__
def __eq__(self, other):
if not isinstance(other, relativedelta):
return False
return NotImplemented
if self.weekday or other.weekday:
if not self.weekday or not other.weekday:
return False
@ -400,6 +525,7 @@ Here is the behavior of operations with relativedelta:
self.hours == other.hours and
self.minutes == other.minutes and
self.seconds == other.seconds and
self.microseconds == other.microseconds and
self.leapdays == other.leapdays and
self.year == other.year and
self.month == other.month and
@ -409,11 +535,38 @@ Here is the behavior of operations with relativedelta:
self.second == other.second and
self.microsecond == other.microsecond)
def __hash__(self):
return hash((
self.weekday,
self.years,
self.months,
self.days,
self.hours,
self.minutes,
self.seconds,
self.microseconds,
self.leapdays,
self.year,
self.month,
self.day,
self.hour,
self.minute,
self.second,
self.microsecond,
))
def __ne__(self, other):
return not self.__eq__(other)
def __div__(self, other):
return self.__mul__(1/float(other))
try:
reciprocal = 1 / float(other)
except TypeError:
return NotImplemented
return self.__mul__(reciprocal)
__truediv__ = __div__
def __repr__(self):
l = []
@ -421,12 +574,17 @@ Here is the behavior of operations with relativedelta:
"hours", "minutes", "seconds", "microseconds"]:
value = getattr(self, attr)
if value:
l.append("%s=%+d" % (attr, value))
l.append("{attr}={value:+g}".format(attr=attr, value=value))
for attr in ["year", "month", "day", "weekday",
"hour", "minute", "second", "microsecond"]:
value = getattr(self, attr)
if value is not None:
l.append("%s=%s" % (attr, `value`))
return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
l.append("{attr}={value}".format(attr=attr, value=repr(value)))
return "{classname}({attrs})".format(classname=self.__class__.__name__,
attrs=", ".join(l))
def _sign(x):
return int(copysign(1, x))
# vim:ts=4:sw=4:et

File diff suppressed because it is too large Load diff

View file

@ -1,951 +0,0 @@
"""
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
import datetime
import struct
import time
import sys
import os
relativedelta = None
parser = None
rrule = None
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz"]
try:
from dateutil.tzwin import tzwin, tzwinlocal
except (ImportError, OSError):
tzwin, tzwinlocal = None, None
ZERO = datetime.timedelta(0)
EPOCHORDINAL = datetime.datetime.utcfromtimestamp(0).toordinal()
class tzutc(datetime.tzinfo):
def utcoffset(self, dt):
return ZERO
def dst(self, dt):
return ZERO
def tzname(self, dt):
return "UTC"
def __eq__(self, other):
return (isinstance(other, tzutc) or
(isinstance(other, tzoffset) and other._offset == ZERO))
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s()" % self.__class__.__name__
__reduce__ = object.__reduce__
class tzoffset(datetime.tzinfo):
def __init__(self, name, offset):
self._name = name
self._offset = datetime.timedelta(seconds=offset)
def utcoffset(self, dt):
return self._offset
def dst(self, dt):
return ZERO
def tzname(self, dt):
return self._name
def __eq__(self, other):
return (isinstance(other, tzoffset) and
self._offset == other._offset)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s(%s, %s)" % (self.__class__.__name__,
`self._name`,
self._offset.days*86400+self._offset.seconds)
__reduce__ = object.__reduce__
class tzlocal(datetime.tzinfo):
_std_offset = datetime.timedelta(seconds=-time.timezone)
if time.daylight:
_dst_offset = datetime.timedelta(seconds=-time.altzone)
else:
_dst_offset = _std_offset
def utcoffset(self, dt):
if self._isdst(dt):
return self._dst_offset
else:
return self._std_offset
def dst(self, dt):
if self._isdst(dt):
return self._dst_offset-self._std_offset
else:
return ZERO
def tzname(self, dt):
return time.tzname[self._isdst(dt)]
def _isdst(self, dt):
# We can't use mktime here. It is unstable when deciding if
# the hour near to a change is DST or not.
#
# timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour,
# dt.minute, dt.second, dt.weekday(), 0, -1))
# return time.localtime(timestamp).tm_isdst
#
# The code above yields the following result:
#
#>>> import tz, datetime
#>>> t = tz.tzlocal()
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
#'BRDT'
#>>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname()
#'BRST'
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
#'BRST'
#>>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname()
#'BRDT'
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
#'BRDT'
#
# Here is a more stable implementation:
#
timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
+ dt.hour * 3600
+ dt.minute * 60
+ dt.second)
return time.localtime(timestamp+time.timezone).tm_isdst
def __eq__(self, other):
if not isinstance(other, tzlocal):
return False
return (self._std_offset == other._std_offset and
self._dst_offset == other._dst_offset)
return True
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s()" % self.__class__.__name__
__reduce__ = object.__reduce__
class _ttinfo(object):
__slots__ = ["offset", "delta", "isdst", "abbr", "isstd", "isgmt"]
def __init__(self):
for attr in self.__slots__:
setattr(self, attr, None)
def __repr__(self):
l = []
for attr in self.__slots__:
value = getattr(self, attr)
if value is not None:
l.append("%s=%s" % (attr, `value`))
return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
def __eq__(self, other):
if not isinstance(other, _ttinfo):
return False
return (self.offset == other.offset and
self.delta == other.delta and
self.isdst == other.isdst and
self.abbr == other.abbr and
self.isstd == other.isstd and
self.isgmt == other.isgmt)
def __ne__(self, other):
return not self.__eq__(other)
def __getstate__(self):
state = {}
for name in self.__slots__:
state[name] = getattr(self, name, None)
return state
def __setstate__(self, state):
for name in self.__slots__:
if name in state:
setattr(self, name, state[name])
class tzfile(datetime.tzinfo):
# http://www.twinsun.com/tz/tz-link.htm
# ftp://elsie.nci.nih.gov/pub/tz*.tar.gz
def __init__(self, fileobj):
if isinstance(fileobj, basestring):
self._filename = fileobj
fileobj = open(fileobj)
elif hasattr(fileobj, "name"):
self._filename = fileobj.name
else:
self._filename = `fileobj`
# From tzfile(5):
#
# The time zone information files used by tzset(3)
# begin with the magic characters "TZif" to identify
# them as time zone information files, followed by
# sixteen bytes reserved for future use, followed by
# six four-byte values of type long, written in a
# ``standard'' byte order (the high-order byte
# of the value is written first).
if fileobj.read(4) != "TZif":
raise ValueError, "magic not found"
fileobj.read(16)
(
# The number of UTC/local indicators stored in the file.
ttisgmtcnt,
# The number of standard/wall indicators stored in the file.
ttisstdcnt,
# The number of leap seconds for which data is
# stored in the file.
leapcnt,
# The number of "transition times" for which data
# is stored in the file.
timecnt,
# The number of "local time types" for which data
# is stored in the file (must not be zero).
typecnt,
# The number of characters of "time zone
# abbreviation strings" stored in the file.
charcnt,
) = struct.unpack(">6l", fileobj.read(24))
# The above header is followed by tzh_timecnt four-byte
# values of type long, sorted in ascending order.
# These values are written in ``standard'' byte order.
# Each is used as a transition time (as returned by
# time(2)) at which the rules for computing local time
# change.
if timecnt:
self._trans_list = struct.unpack(">%dl" % timecnt,
fileobj.read(timecnt*4))
else:
self._trans_list = []
# Next come tzh_timecnt one-byte values of type unsigned
# char; each one tells which of the different types of
# ``local time'' types described in the file is associated
# with the same-indexed transition time. These values
# serve as indices into an array of ttinfo structures that
# appears next in the file.
if timecnt:
self._trans_idx = struct.unpack(">%dB" % timecnt,
fileobj.read(timecnt))
else:
self._trans_idx = []
# Each ttinfo structure is written as a four-byte value
# for tt_gmtoff of type long, in a standard byte
# order, followed by a one-byte value for tt_isdst
# and a one-byte value for tt_abbrind. In each
# structure, tt_gmtoff gives the number of
# seconds to be added to UTC, tt_isdst tells whether
# tm_isdst should be set by localtime(3), and
# tt_abbrind serves as an index into the array of
# time zone abbreviation characters that follow the
# ttinfo structure(s) in the file.
ttinfo = []
for i in range(typecnt):
ttinfo.append(struct.unpack(">lbb", fileobj.read(6)))
abbr = fileobj.read(charcnt)
# Then there are tzh_leapcnt pairs of four-byte
# values, written in standard byte order; the
# first value of each pair gives the time (as
# returned by time(2)) at which a leap second
# occurs; the second gives the total number of
# leap seconds to be applied after the given time.
# The pairs of values are sorted in ascending order
# by time.
# Not used, for now
if leapcnt:
leap = struct.unpack(">%dl" % (leapcnt*2),
fileobj.read(leapcnt*8))
# Then there are tzh_ttisstdcnt standard/wall
# indicators, each stored as a one-byte value;
# they tell whether the transition times associated
# with local time types were specified as standard
# time or wall clock time, and are used when
# a time zone file is used in handling POSIX-style
# time zone environment variables.
if ttisstdcnt:
isstd = struct.unpack(">%db" % ttisstdcnt,
fileobj.read(ttisstdcnt))
# Finally, there are tzh_ttisgmtcnt UTC/local
# indicators, each stored as a one-byte value;
# they tell whether the transition times associated
# with local time types were specified as UTC or
# local time, and are used when a time zone file
# is used in handling POSIX-style time zone envi-
# ronment variables.
if ttisgmtcnt:
isgmt = struct.unpack(">%db" % ttisgmtcnt,
fileobj.read(ttisgmtcnt))
# ** Everything has been read **
# Build ttinfo list
self._ttinfo_list = []
for i in range(typecnt):
gmtoff, isdst, abbrind = ttinfo[i]
# Round to full-minutes if that's not the case. Python's
# datetime doesn't accept sub-minute timezones. Check
# http://python.org/sf/1447945 for some information.
gmtoff = (gmtoff+30)//60*60
tti = _ttinfo()
tti.offset = gmtoff
tti.delta = datetime.timedelta(seconds=gmtoff)
tti.isdst = isdst
tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)]
tti.isstd = (ttisstdcnt > i and isstd[i] != 0)
tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0)
self._ttinfo_list.append(tti)
# Replace ttinfo indexes for ttinfo objects.
trans_idx = []
for idx in self._trans_idx:
trans_idx.append(self._ttinfo_list[idx])
self._trans_idx = tuple(trans_idx)
# Set standard, dst, and before ttinfos. before will be
# used when a given time is before any transitions,
# and will be set to the first non-dst ttinfo, or to
# the first dst, if all of them are dst.
self._ttinfo_std = None
self._ttinfo_dst = None
self._ttinfo_before = None
if self._ttinfo_list:
if not self._trans_list:
self._ttinfo_std = self._ttinfo_first = self._ttinfo_list[0]
else:
for i in range(timecnt-1,-1,-1):
tti = self._trans_idx[i]
if not self._ttinfo_std and not tti.isdst:
self._ttinfo_std = tti
elif not self._ttinfo_dst and tti.isdst:
self._ttinfo_dst = tti
if self._ttinfo_std and self._ttinfo_dst:
break
else:
if self._ttinfo_dst and not self._ttinfo_std:
self._ttinfo_std = self._ttinfo_dst
for tti in self._ttinfo_list:
if not tti.isdst:
self._ttinfo_before = tti
break
else:
self._ttinfo_before = self._ttinfo_list[0]
# Now fix transition times to become relative to wall time.
#
# I'm not sure about this. In my tests, the tz source file
# is setup to wall time, and in the binary file isstd and
# isgmt are off, so it should be in wall time. OTOH, it's
# always in gmt time. Let me know if you have comments
# about this.
laststdoffset = 0
self._trans_list = list(self._trans_list)
for i in range(len(self._trans_list)):
tti = self._trans_idx[i]
if not tti.isdst:
# This is std time.
self._trans_list[i] += tti.offset
laststdoffset = tti.offset
else:
# This is dst time. Convert to std.
self._trans_list[i] += laststdoffset
self._trans_list = tuple(self._trans_list)
def _find_ttinfo(self, dt, laststd=0):
timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
+ dt.hour * 3600
+ dt.minute * 60
+ dt.second)
idx = 0
for trans in self._trans_list:
if timestamp < trans:
break
idx += 1
else:
return self._ttinfo_std
if idx == 0:
return self._ttinfo_before
if laststd:
while idx > 0:
tti = self._trans_idx[idx-1]
if not tti.isdst:
return tti
idx -= 1
else:
return self._ttinfo_std
else:
return self._trans_idx[idx-1]
def utcoffset(self, dt):
if not self._ttinfo_std:
return ZERO
return self._find_ttinfo(dt).delta
def dst(self, dt):
if not self._ttinfo_dst:
return ZERO
tti = self._find_ttinfo(dt)
if not tti.isdst:
return ZERO
# The documentation says that utcoffset()-dst() must
# be constant for every dt.
return tti.delta-self._find_ttinfo(dt, laststd=1).delta
# An alternative for that would be:
#
# return self._ttinfo_dst.offset-self._ttinfo_std.offset
#
# However, this class stores historical changes in the
# dst offset, so I belive that this wouldn't be the right
# way to implement this.
def tzname(self, dt):
if not self._ttinfo_std:
return None
return self._find_ttinfo(dt).abbr
def __eq__(self, other):
if not isinstance(other, tzfile):
return False
return (self._trans_list == other._trans_list and
self._trans_idx == other._trans_idx and
self._ttinfo_list == other._ttinfo_list)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, `self._filename`)
def __reduce__(self):
if not os.path.isfile(self._filename):
raise ValueError, "Unpickable %s class" % self.__class__.__name__
return (self.__class__, (self._filename,))
class tzrange(datetime.tzinfo):
def __init__(self, stdabbr, stdoffset=None,
dstabbr=None, dstoffset=None,
start=None, end=None):
global relativedelta
if not relativedelta:
from dateutil import relativedelta
self._std_abbr = stdabbr
self._dst_abbr = dstabbr
if stdoffset is not None:
self._std_offset = datetime.timedelta(seconds=stdoffset)
else:
self._std_offset = ZERO
if dstoffset is not None:
self._dst_offset = datetime.timedelta(seconds=dstoffset)
elif dstabbr and stdoffset is not None:
self._dst_offset = self._std_offset+datetime.timedelta(hours=+1)
else:
self._dst_offset = ZERO
if dstabbr and start is None:
self._start_delta = relativedelta.relativedelta(
hours=+2, month=4, day=1, weekday=relativedelta.SU(+1))
else:
self._start_delta = start
if dstabbr and end is None:
self._end_delta = relativedelta.relativedelta(
hours=+1, month=10, day=31, weekday=relativedelta.SU(-1))
else:
self._end_delta = end
def utcoffset(self, dt):
if self._isdst(dt):
return self._dst_offset
else:
return self._std_offset
def dst(self, dt):
if self._isdst(dt):
return self._dst_offset-self._std_offset
else:
return ZERO
def tzname(self, dt):
if self._isdst(dt):
return self._dst_abbr
else:
return self._std_abbr
def _isdst(self, dt):
if not self._start_delta:
return False
year = datetime.datetime(dt.year,1,1)
start = year+self._start_delta
end = year+self._end_delta
dt = dt.replace(tzinfo=None)
if start < end:
return dt >= start and dt < end
else:
return dt >= start or dt < end
def __eq__(self, other):
if not isinstance(other, tzrange):
return False
return (self._std_abbr == other._std_abbr and
self._dst_abbr == other._dst_abbr and
self._std_offset == other._std_offset and
self._dst_offset == other._dst_offset and
self._start_delta == other._start_delta and
self._end_delta == other._end_delta)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s(...)" % self.__class__.__name__
__reduce__ = object.__reduce__
class tzstr(tzrange):
def __init__(self, s):
global parser
if not parser:
from dateutil import parser
self._s = s
res = parser._parsetz(s)
if res is None:
raise ValueError, "unknown string format"
# Here we break the compatibility with the TZ variable handling.
# GMT-3 actually *means* the timezone -3.
if res.stdabbr in ("GMT", "UTC"):
res.stdoffset *= -1
# We must initialize it first, since _delta() needs
# _std_offset and _dst_offset set. Use False in start/end
# to avoid building it two times.
tzrange.__init__(self, res.stdabbr, res.stdoffset,
res.dstabbr, res.dstoffset,
start=False, end=False)
if not res.dstabbr:
self._start_delta = None
self._end_delta = None
else:
self._start_delta = self._delta(res.start)
if self._start_delta:
self._end_delta = self._delta(res.end, isend=1)
def _delta(self, x, isend=0):
kwargs = {}
if x.month is not None:
kwargs["month"] = x.month
if x.weekday is not None:
kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week)
if x.week > 0:
kwargs["day"] = 1
else:
kwargs["day"] = 31
elif x.day:
kwargs["day"] = x.day
elif x.yday is not None:
kwargs["yearday"] = x.yday
elif x.jyday is not None:
kwargs["nlyearday"] = x.jyday
if not kwargs:
# Default is to start on first sunday of april, and end
# on last sunday of october.
if not isend:
kwargs["month"] = 4
kwargs["day"] = 1
kwargs["weekday"] = relativedelta.SU(+1)
else:
kwargs["month"] = 10
kwargs["day"] = 31
kwargs["weekday"] = relativedelta.SU(-1)
if x.time is not None:
kwargs["seconds"] = x.time
else:
# Default is 2AM.
kwargs["seconds"] = 7200
if isend:
# Convert to standard time, to follow the documented way
# of working with the extra hour. See the documentation
# of the tzinfo class.
delta = self._dst_offset-self._std_offset
kwargs["seconds"] -= delta.seconds+delta.days*86400
return relativedelta.relativedelta(**kwargs)
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, `self._s`)
class _tzicalvtzcomp:
def __init__(self, tzoffsetfrom, tzoffsetto, isdst,
tzname=None, rrule=None):
self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom)
self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto)
self.tzoffsetdiff = self.tzoffsetto-self.tzoffsetfrom
self.isdst = isdst
self.tzname = tzname
self.rrule = rrule
class _tzicalvtz(datetime.tzinfo):
def __init__(self, tzid, comps=[]):
self._tzid = tzid
self._comps = comps
self._cachedate = []
self._cachecomp = []
def _find_comp(self, dt):
if len(self._comps) == 1:
return self._comps[0]
dt = dt.replace(tzinfo=None)
try:
return self._cachecomp[self._cachedate.index(dt)]
except ValueError:
pass
lastcomp = None
lastcompdt = None
for comp in self._comps:
if not comp.isdst:
# Handle the extra hour in DST -> STD
compdt = comp.rrule.before(dt-comp.tzoffsetdiff, inc=True)
else:
compdt = comp.rrule.before(dt, inc=True)
if compdt and (not lastcompdt or lastcompdt < compdt):
lastcompdt = compdt
lastcomp = comp
if not lastcomp:
# RFC says nothing about what to do when a given
# time is before the first onset date. We'll look for the
# first standard component, or the first component, if
# none is found.
for comp in self._comps:
if not comp.isdst:
lastcomp = comp
break
else:
lastcomp = comp[0]
self._cachedate.insert(0, dt)
self._cachecomp.insert(0, lastcomp)
if len(self._cachedate) > 10:
self._cachedate.pop()
self._cachecomp.pop()
return lastcomp
def utcoffset(self, dt):
return self._find_comp(dt).tzoffsetto
def dst(self, dt):
comp = self._find_comp(dt)
if comp.isdst:
return comp.tzoffsetdiff
else:
return ZERO
def tzname(self, dt):
return self._find_comp(dt).tzname
def __repr__(self):
return "<tzicalvtz %s>" % `self._tzid`
__reduce__ = object.__reduce__
class tzical:
def __init__(self, fileobj):
global rrule
if not rrule:
from dateutil import rrule
if isinstance(fileobj, basestring):
self._s = fileobj
fileobj = open(fileobj)
elif hasattr(fileobj, "name"):
self._s = fileobj.name
else:
self._s = `fileobj`
self._vtz = {}
self._parse_rfc(fileobj.read())
def keys(self):
return self._vtz.keys()
def get(self, tzid=None):
if tzid is None:
keys = self._vtz.keys()
if len(keys) == 0:
raise ValueError, "no timezones defined"
elif len(keys) > 1:
raise ValueError, "more than one timezone available"
tzid = keys[0]
return self._vtz.get(tzid)
def _parse_offset(self, s):
s = s.strip()
if not s:
raise ValueError, "empty offset"
if s[0] in ('+', '-'):
signal = (-1,+1)[s[0]=='+']
s = s[1:]
else:
signal = +1
if len(s) == 4:
return (int(s[:2])*3600+int(s[2:])*60)*signal
elif len(s) == 6:
return (int(s[:2])*3600+int(s[2:4])*60+int(s[4:]))*signal
else:
raise ValueError, "invalid offset: "+s
def _parse_rfc(self, s):
lines = s.splitlines()
if not lines:
raise ValueError, "empty string"
# Unfold
i = 0
while i < len(lines):
line = lines[i].rstrip()
if not line:
del lines[i]
elif i > 0 and line[0] == " ":
lines[i-1] += line[1:]
del lines[i]
else:
i += 1
tzid = None
comps = []
invtz = False
comptype = None
for line in lines:
if not line:
continue
name, value = line.split(':', 1)
parms = name.split(';')
if not parms:
raise ValueError, "empty property name"
name = parms[0].upper()
parms = parms[1:]
if invtz:
if name == "BEGIN":
if value in ("STANDARD", "DAYLIGHT"):
# Process component
pass
else:
raise ValueError, "unknown component: "+value
comptype = value
founddtstart = False
tzoffsetfrom = None
tzoffsetto = None
rrulelines = []
tzname = None
elif name == "END":
if value == "VTIMEZONE":
if comptype:
raise ValueError, \
"component not closed: "+comptype
if not tzid:
raise ValueError, \
"mandatory TZID not found"
if not comps:
raise ValueError, \
"at least one component is needed"
# Process vtimezone
self._vtz[tzid] = _tzicalvtz(tzid, comps)
invtz = False
elif value == comptype:
if not founddtstart:
raise ValueError, \
"mandatory DTSTART not found"
if tzoffsetfrom is None:
raise ValueError, \
"mandatory TZOFFSETFROM not found"
if tzoffsetto is None:
raise ValueError, \
"mandatory TZOFFSETFROM not found"
# Process component
rr = None
if rrulelines:
rr = rrule.rrulestr("\n".join(rrulelines),
compatible=True,
ignoretz=True,
cache=True)
comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto,
(comptype == "DAYLIGHT"),
tzname, rr)
comps.append(comp)
comptype = None
else:
raise ValueError, \
"invalid component end: "+value
elif comptype:
if name == "DTSTART":
rrulelines.append(line)
founddtstart = True
elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"):
rrulelines.append(line)
elif name == "TZOFFSETFROM":
if parms:
raise ValueError, \
"unsupported %s parm: %s "%(name, parms[0])
tzoffsetfrom = self._parse_offset(value)
elif name == "TZOFFSETTO":
if parms:
raise ValueError, \
"unsupported TZOFFSETTO parm: "+parms[0]
tzoffsetto = self._parse_offset(value)
elif name == "TZNAME":
if parms:
raise ValueError, \
"unsupported TZNAME parm: "+parms[0]
tzname = value
elif name == "COMMENT":
pass
else:
raise ValueError, "unsupported property: "+name
else:
if name == "TZID":
if parms:
raise ValueError, \
"unsupported TZID parm: "+parms[0]
tzid = value
elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"):
pass
else:
raise ValueError, "unsupported property: "+name
elif name == "BEGIN" and value == "VTIMEZONE":
tzid = None
comps = []
invtz = True
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, `self._s`)
if sys.platform != "win32":
TZFILES = ["/etc/localtime", "localtime"]
TZPATHS = ["/usr/share/zoneinfo", "/usr/lib/zoneinfo", "/etc/zoneinfo"]
else:
TZFILES = []
TZPATHS = []
def gettz(name=None):
tz = None
if not name:
try:
name = os.environ["TZ"]
except KeyError:
pass
if name is None or name == ":":
for filepath in TZFILES:
if not os.path.isabs(filepath):
filename = filepath
for path in TZPATHS:
filepath = os.path.join(path, filename)
if os.path.isfile(filepath):
break
else:
continue
if os.path.isfile(filepath):
try:
tz = tzfile(filepath)
break
except (IOError, OSError, ValueError):
pass
else:
tz = tzlocal()
else:
if name.startswith(":"):
name = name[:-1]
if os.path.isabs(name):
if os.path.isfile(name):
tz = tzfile(name)
else:
tz = None
else:
for path in TZPATHS:
filepath = os.path.join(path, name)
if not os.path.isfile(filepath):
filepath = filepath.replace(' ','_')
if not os.path.isfile(filepath):
continue
try:
tz = tzfile(filepath)
break
except (IOError, OSError, ValueError):
pass
else:
tz = None
if tzwin:
try:
tz = tzwin(name)
except OSError:
pass
if not tz:
from dateutil.zoneinfo import gettz
tz = gettz(name)
if not tz:
for c in name:
# name must have at least one offset to be a tzstr
if c in "0123456789":
try:
tz = tzstr(name)
except ValueError:
pass
break
else:
if name in ("GMT", "UTC"):
tz = tzutc()
elif name in time.tzname:
tz = tzlocal()
return tz
# vim:ts=4:sw=4:et

View file

@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
from .tz import *
from .tz import __doc__
#: Convenience constant providing a :class:`tzutc()` instance
#:
#: .. versionadded:: 2.7.0
UTC = tzutc()
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz",
"enfold", "datetime_ambiguous", "datetime_exists",
"resolve_imaginary", "UTC", "DeprecatedTzFormatWarning"]
class DeprecatedTzFormatWarning(Warning):
"""Warning raised when time zones are parsed from deprecated formats."""

415
libs/dateutil/tz/_common.py Normal file
View file

@ -0,0 +1,415 @@
from six import PY3
from functools import wraps
from datetime import datetime, timedelta, tzinfo
ZERO = timedelta(0)
__all__ = ['tzname_in_python2', 'enfold']
def tzname_in_python2(namefunc):
"""Change unicode output into bytestrings in Python 2
tzname() API changed in Python 3. It used to return bytes, but was changed
to unicode strings
"""
def adjust_encoding(*args, **kwargs):
name = namefunc(*args, **kwargs)
if name is not None and not PY3:
name = name.encode()
return name
return adjust_encoding
# The following is adapted from Alexander Belopolsky's tz library
# https://github.com/abalkin/tz
if hasattr(datetime, 'fold'):
# This is the pre-python 3.6 fold situation
def enfold(dt, fold=1):
"""
Provides a unified interface for assigning the ``fold`` attribute to
datetimes both before and after the implementation of PEP-495.
:param fold:
The value for the ``fold`` attribute in the returned datetime. This
should be either 0 or 1.
:return:
Returns an object for which ``getattr(dt, 'fold', 0)`` returns
``fold`` for all versions of Python. In versions prior to
Python 3.6, this is a ``_DatetimeWithFold`` object, which is a
subclass of :py:class:`datetime.datetime` with the ``fold``
attribute added, if ``fold`` is 1.
.. versionadded:: 2.6.0
"""
return dt.replace(fold=fold)
else:
class _DatetimeWithFold(datetime):
"""
This is a class designed to provide a PEP 495-compliant interface for
Python versions before 3.6. It is used only for dates in a fold, so
the ``fold`` attribute is fixed at ``1``.
.. versionadded:: 2.6.0
"""
__slots__ = ()
def replace(self, *args, **kwargs):
"""
Return a datetime with the same attributes, except for those
attributes given new values by whichever keyword arguments are
specified. Note that tzinfo=None can be specified to create a naive
datetime from an aware datetime with no conversion of date and time
data.
This is reimplemented in ``_DatetimeWithFold`` because pypy3 will
return a ``datetime.datetime`` even if ``fold`` is unchanged.
"""
argnames = (
'year', 'month', 'day', 'hour', 'minute', 'second',
'microsecond', 'tzinfo'
)
for arg, argname in zip(args, argnames):
if argname in kwargs:
raise TypeError('Duplicate argument: {}'.format(argname))
kwargs[argname] = arg
for argname in argnames:
if argname not in kwargs:
kwargs[argname] = getattr(self, argname)
dt_class = self.__class__ if kwargs.get('fold', 1) else datetime
return dt_class(**kwargs)
@property
def fold(self):
return 1
def enfold(dt, fold=1):
"""
Provides a unified interface for assigning the ``fold`` attribute to
datetimes both before and after the implementation of PEP-495.
:param fold:
The value for the ``fold`` attribute in the returned datetime. This
should be either 0 or 1.
:return:
Returns an object for which ``getattr(dt, 'fold', 0)`` returns
``fold`` for all versions of Python. In versions prior to
Python 3.6, this is a ``_DatetimeWithFold`` object, which is a
subclass of :py:class:`datetime.datetime` with the ``fold``
attribute added, if ``fold`` is 1.
.. versionadded:: 2.6.0
"""
if getattr(dt, 'fold', 0) == fold:
return dt
args = dt.timetuple()[:6]
args += (dt.microsecond, dt.tzinfo)
if fold:
return _DatetimeWithFold(*args)
else:
return datetime(*args)
def _validate_fromutc_inputs(f):
"""
The CPython version of ``fromutc`` checks that the input is a ``datetime``
object and that ``self`` is attached as its ``tzinfo``.
"""
@wraps(f)
def fromutc(self, dt):
if not isinstance(dt, datetime):
raise TypeError("fromutc() requires a datetime argument")
if dt.tzinfo is not self:
raise ValueError("dt.tzinfo is not self")
return f(self, dt)
return fromutc
class _tzinfo(tzinfo):
"""
Base class for all ``dateutil`` ``tzinfo`` objects.
"""
def is_ambiguous(self, dt):
"""
Whether or not the "wall time" of a given datetime is ambiguous in this
zone.
:param dt:
A :py:class:`datetime.datetime`, naive or time zone aware.
:return:
Returns ``True`` if ambiguous, ``False`` otherwise.
.. versionadded:: 2.6.0
"""
dt = dt.replace(tzinfo=self)
wall_0 = enfold(dt, fold=0)
wall_1 = enfold(dt, fold=1)
same_offset = wall_0.utcoffset() == wall_1.utcoffset()
same_dt = wall_0.replace(tzinfo=None) == wall_1.replace(tzinfo=None)
return same_dt and not same_offset
def _fold_status(self, dt_utc, dt_wall):
"""
Determine the fold status of a "wall" datetime, given a representation
of the same datetime as a (naive) UTC datetime. This is calculated based
on the assumption that ``dt.utcoffset() - dt.dst()`` is constant for all
datetimes, and that this offset is the actual number of hours separating
``dt_utc`` and ``dt_wall``.
:param dt_utc:
Representation of the datetime as UTC
:param dt_wall:
Representation of the datetime as "wall time". This parameter must
either have a `fold` attribute or have a fold-naive
:class:`datetime.tzinfo` attached, otherwise the calculation may
fail.
"""
if self.is_ambiguous(dt_wall):
delta_wall = dt_wall - dt_utc
_fold = int(delta_wall == (dt_utc.utcoffset() - dt_utc.dst()))
else:
_fold = 0
return _fold
def _fold(self, dt):
return getattr(dt, 'fold', 0)
def _fromutc(self, dt):
"""
Given a timezone-aware datetime in a given timezone, calculates a
timezone-aware datetime in a new timezone.
Since this is the one time that we *know* we have an unambiguous
datetime object, we take this opportunity to determine whether the
datetime is ambiguous and in a "fold" state (e.g. if it's the first
occurence, chronologically, of the ambiguous datetime).
:param dt:
A timezone-aware :class:`datetime.datetime` object.
"""
# Re-implement the algorithm from Python's datetime.py
dtoff = dt.utcoffset()
if dtoff is None:
raise ValueError("fromutc() requires a non-None utcoffset() "
"result")
# The original datetime.py code assumes that `dst()` defaults to
# zero during ambiguous times. PEP 495 inverts this presumption, so
# for pre-PEP 495 versions of python, we need to tweak the algorithm.
dtdst = dt.dst()
if dtdst is None:
raise ValueError("fromutc() requires a non-None dst() result")
delta = dtoff - dtdst
dt += delta
# Set fold=1 so we can default to being in the fold for
# ambiguous dates.
dtdst = enfold(dt, fold=1).dst()
if dtdst is None:
raise ValueError("fromutc(): dt.dst gave inconsistent "
"results; cannot convert")
return dt + dtdst
@_validate_fromutc_inputs
def fromutc(self, dt):
"""
Given a timezone-aware datetime in a given timezone, calculates a
timezone-aware datetime in a new timezone.
Since this is the one time that we *know* we have an unambiguous
datetime object, we take this opportunity to determine whether the
datetime is ambiguous and in a "fold" state (e.g. if it's the first
occurance, chronologically, of the ambiguous datetime).
:param dt:
A timezone-aware :class:`datetime.datetime` object.
"""
dt_wall = self._fromutc(dt)
# Calculate the fold status given the two datetimes.
_fold = self._fold_status(dt, dt_wall)
# Set the default fold value for ambiguous dates
return enfold(dt_wall, fold=_fold)
class tzrangebase(_tzinfo):
"""
This is an abstract base class for time zones represented by an annual
transition into and out of DST. Child classes should implement the following
methods:
* ``__init__(self, *args, **kwargs)``
* ``transitions(self, year)`` - this is expected to return a tuple of
datetimes representing the DST on and off transitions in standard
time.
A fully initialized ``tzrangebase`` subclass should also provide the
following attributes:
* ``hasdst``: Boolean whether or not the zone uses DST.
* ``_dst_offset`` / ``_std_offset``: :class:`datetime.timedelta` objects
representing the respective UTC offsets.
* ``_dst_abbr`` / ``_std_abbr``: Strings representing the timezone short
abbreviations in DST and STD, respectively.
* ``_hasdst``: Whether or not the zone has DST.
.. versionadded:: 2.6.0
"""
def __init__(self):
raise NotImplementedError('tzrangebase is an abstract base class')
def utcoffset(self, dt):
isdst = self._isdst(dt)
if isdst is None:
return None
elif isdst:
return self._dst_offset
else:
return self._std_offset
def dst(self, dt):
isdst = self._isdst(dt)
if isdst is None:
return None
elif isdst:
return self._dst_base_offset
else:
return ZERO
@tzname_in_python2
def tzname(self, dt):
if self._isdst(dt):
return self._dst_abbr
else:
return self._std_abbr
def fromutc(self, dt):
""" Given a datetime in UTC, return local time """
if not isinstance(dt, datetime):
raise TypeError("fromutc() requires a datetime argument")
if dt.tzinfo is not self:
raise ValueError("dt.tzinfo is not self")
# Get transitions - if there are none, fixed offset
transitions = self.transitions(dt.year)
if transitions is None:
return dt + self.utcoffset(dt)
# Get the transition times in UTC
dston, dstoff = transitions
dston -= self._std_offset
dstoff -= self._std_offset
utc_transitions = (dston, dstoff)
dt_utc = dt.replace(tzinfo=None)
isdst = self._naive_isdst(dt_utc, utc_transitions)
if isdst:
dt_wall = dt + self._dst_offset
else:
dt_wall = dt + self._std_offset
_fold = int(not isdst and self.is_ambiguous(dt_wall))
return enfold(dt_wall, fold=_fold)
def is_ambiguous(self, dt):
"""
Whether or not the "wall time" of a given datetime is ambiguous in this
zone.
:param dt:
A :py:class:`datetime.datetime`, naive or time zone aware.
:return:
Returns ``True`` if ambiguous, ``False`` otherwise.
.. versionadded:: 2.6.0
"""
if not self.hasdst:
return False
start, end = self.transitions(dt.year)
dt = dt.replace(tzinfo=None)
return (end <= dt < end + self._dst_base_offset)
def _isdst(self, dt):
if not self.hasdst:
return False
elif dt is None:
return None
transitions = self.transitions(dt.year)
if transitions is None:
return False
dt = dt.replace(tzinfo=None)
isdst = self._naive_isdst(dt, transitions)
# Handle ambiguous dates
if not isdst and self.is_ambiguous(dt):
return not self._fold(dt)
else:
return isdst
def _naive_isdst(self, dt, transitions):
dston, dstoff = transitions
dt = dt.replace(tzinfo=None)
if dston < dstoff:
isdst = dston <= dt < dstoff
else:
isdst = not dstoff <= dt < dston
return isdst
@property
def _dst_base_offset(self):
return self._dst_offset - self._std_offset
__hash__ = None
def __ne__(self, other):
return not (self == other)
def __repr__(self):
return "%s(...)" % self.__class__.__name__
__reduce__ = object.__reduce__

View file

@ -0,0 +1,49 @@
from datetime import timedelta
class _TzSingleton(type):
def __init__(cls, *args, **kwargs):
cls.__instance = None
super(_TzSingleton, cls).__init__(*args, **kwargs)
def __call__(cls):
if cls.__instance is None:
cls.__instance = super(_TzSingleton, cls).__call__()
return cls.__instance
class _TzFactory(type):
def instance(cls, *args, **kwargs):
"""Alternate constructor that returns a fresh instance"""
return type.__call__(cls, *args, **kwargs)
class _TzOffsetFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = {}
def __call__(cls, name, offset):
if isinstance(offset, timedelta):
key = (name, offset.total_seconds())
else:
key = (name, offset)
instance = cls.__instances.get(key, None)
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(name, offset))
return instance
class _TzStrFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = {}
def __call__(cls, s, posix_offset=False):
key = (s, posix_offset)
instance = cls.__instances.get(key, None)
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(s, posix_offset))
return instance

1785
libs/dateutil/tz/tz.py Normal file

File diff suppressed because it is too large Load diff

331
libs/dateutil/tz/win.py Normal file
View file

@ -0,0 +1,331 @@
# This code was originally contributed by Jeffrey Harris.
import datetime
import struct
from six.moves import winreg
from six import text_type
try:
import ctypes
from ctypes import wintypes
except ValueError:
# ValueError is raised on non-Windows systems for some horrible reason.
raise ImportError("Running tzwin on non-Windows system")
from ._common import tzrangebase
__all__ = ["tzwin", "tzwinlocal", "tzres"]
ONEWEEK = datetime.timedelta(7)
TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones"
TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
def _settzkeyname():
handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
try:
winreg.OpenKey(handle, TZKEYNAMENT).Close()
TZKEYNAME = TZKEYNAMENT
except WindowsError:
TZKEYNAME = TZKEYNAME9X
handle.Close()
return TZKEYNAME
TZKEYNAME = _settzkeyname()
class tzres(object):
"""
Class for accessing `tzres.dll`, which contains timezone name related
resources.
.. versionadded:: 2.5.0
"""
p_wchar = ctypes.POINTER(wintypes.WCHAR) # Pointer to a wide char
def __init__(self, tzres_loc='tzres.dll'):
# Load the user32 DLL so we can load strings from tzres
user32 = ctypes.WinDLL('user32')
# Specify the LoadStringW function
user32.LoadStringW.argtypes = (wintypes.HINSTANCE,
wintypes.UINT,
wintypes.LPWSTR,
ctypes.c_int)
self.LoadStringW = user32.LoadStringW
self._tzres = ctypes.WinDLL(tzres_loc)
self.tzres_loc = tzres_loc
def load_name(self, offset):
"""
Load a timezone name from a DLL offset (integer).
>>> from dateutil.tzwin import tzres
>>> tzr = tzres()
>>> print(tzr.load_name(112))
'Eastern Standard Time'
:param offset:
A positive integer value referring to a string from the tzres dll.
..note:
Offsets found in the registry are generally of the form
`@tzres.dll,-114`. The offset in this case if 114, not -114.
"""
resource = self.p_wchar()
lpBuffer = ctypes.cast(ctypes.byref(resource), wintypes.LPWSTR)
nchar = self.LoadStringW(self._tzres._handle, offset, lpBuffer, 0)
return resource[:nchar]
def name_from_string(self, tzname_str):
"""
Parse strings as returned from the Windows registry into the time zone
name as defined in the registry.
>>> from dateutil.tzwin import tzres
>>> tzr = tzres()
>>> print(tzr.name_from_string('@tzres.dll,-251'))
'Dateline Daylight Time'
>>> print(tzr.name_from_string('Eastern Standard Time'))
'Eastern Standard Time'
:param tzname_str:
A timezone name string as returned from a Windows registry key.
:return:
Returns the localized timezone string from tzres.dll if the string
is of the form `@tzres.dll,-offset`, else returns the input string.
"""
if not tzname_str.startswith('@'):
return tzname_str
name_splt = tzname_str.split(',-')
try:
offset = int(name_splt[1])
except:
raise ValueError("Malformed timezone string.")
return self.load_name(offset)
class tzwinbase(tzrangebase):
"""tzinfo class based on win32's timezones available in the registry."""
def __init__(self):
raise NotImplementedError('tzwinbase is an abstract base class')
def __eq__(self, other):
# Compare on all relevant dimensions, including name.
if not isinstance(other, tzwinbase):
return NotImplemented
return (self._std_offset == other._std_offset and
self._dst_offset == other._dst_offset and
self._stddayofweek == other._stddayofweek and
self._dstdayofweek == other._dstdayofweek and
self._stdweeknumber == other._stdweeknumber and
self._dstweeknumber == other._dstweeknumber and
self._stdhour == other._stdhour and
self._dsthour == other._dsthour and
self._stdminute == other._stdminute and
self._dstminute == other._dstminute and
self._std_abbr == other._std_abbr and
self._dst_abbr == other._dst_abbr)
@staticmethod
def list():
"""Return a list of all time zones known to the system."""
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle:
with winreg.OpenKey(handle, TZKEYNAME) as tzkey:
result = [winreg.EnumKey(tzkey, i)
for i in range(winreg.QueryInfoKey(tzkey)[0])]
return result
def display(self):
return self._display
def transitions(self, year):
"""
For a given year, get the DST on and off transition times, expressed
always on the standard time side. For zones with no transitions, this
function returns ``None``.
:param year:
The year whose transitions you would like to query.
:return:
Returns a :class:`tuple` of :class:`datetime.datetime` objects,
``(dston, dstoff)`` for zones with an annual DST transition, or
``None`` for fixed offset zones.
"""
if not self.hasdst:
return None
dston = picknthweekday(year, self._dstmonth, self._dstdayofweek,
self._dsthour, self._dstminute,
self._dstweeknumber)
dstoff = picknthweekday(year, self._stdmonth, self._stddayofweek,
self._stdhour, self._stdminute,
self._stdweeknumber)
# Ambiguous dates default to the STD side
dstoff -= self._dst_base_offset
return dston, dstoff
def _get_hasdst(self):
return self._dstmonth != 0
@property
def _dst_base_offset(self):
return self._dst_base_offset_
class tzwin(tzwinbase):
def __init__(self, name):
self._name = name
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle:
tzkeyname = text_type("{kn}\\{name}").format(kn=TZKEYNAME, name=name)
with winreg.OpenKey(handle, tzkeyname) as tzkey:
keydict = valuestodict(tzkey)
self._std_abbr = keydict["Std"]
self._dst_abbr = keydict["Dlt"]
self._display = keydict["Display"]
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
tup = struct.unpack("=3l16h", keydict["TZI"])
stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1
dstoffset = stdoffset-tup[2] # + DaylightBias * -1
self._std_offset = datetime.timedelta(minutes=stdoffset)
self._dst_offset = datetime.timedelta(minutes=dstoffset)
# for the meaning see the win32 TIME_ZONE_INFORMATION structure docs
# http://msdn.microsoft.com/en-us/library/windows/desktop/ms725481(v=vs.85).aspx
(self._stdmonth,
self._stddayofweek, # Sunday = 0
self._stdweeknumber, # Last = 5
self._stdhour,
self._stdminute) = tup[4:9]
(self._dstmonth,
self._dstdayofweek, # Sunday = 0
self._dstweeknumber, # Last = 5
self._dsthour,
self._dstminute) = tup[12:17]
self._dst_base_offset_ = self._dst_offset - self._std_offset
self.hasdst = self._get_hasdst()
def __repr__(self):
return "tzwin(%s)" % repr(self._name)
def __reduce__(self):
return (self.__class__, (self._name,))
class tzwinlocal(tzwinbase):
def __init__(self):
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle:
with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey:
keydict = valuestodict(tzlocalkey)
self._std_abbr = keydict["StandardName"]
self._dst_abbr = keydict["DaylightName"]
try:
tzkeyname = text_type('{kn}\\{sn}').format(kn=TZKEYNAME,
sn=self._std_abbr)
with winreg.OpenKey(handle, tzkeyname) as tzkey:
_keydict = valuestodict(tzkey)
self._display = _keydict["Display"]
except OSError:
self._display = None
stdoffset = -keydict["Bias"]-keydict["StandardBias"]
dstoffset = stdoffset-keydict["DaylightBias"]
self._std_offset = datetime.timedelta(minutes=stdoffset)
self._dst_offset = datetime.timedelta(minutes=dstoffset)
# For reasons unclear, in this particular key, the day of week has been
# moved to the END of the SYSTEMTIME structure.
tup = struct.unpack("=8h", keydict["StandardStart"])
(self._stdmonth,
self._stdweeknumber, # Last = 5
self._stdhour,
self._stdminute) = tup[1:5]
self._stddayofweek = tup[7]
tup = struct.unpack("=8h", keydict["DaylightStart"])
(self._dstmonth,
self._dstweeknumber, # Last = 5
self._dsthour,
self._dstminute) = tup[1:5]
self._dstdayofweek = tup[7]
self._dst_base_offset_ = self._dst_offset - self._std_offset
self.hasdst = self._get_hasdst()
def __repr__(self):
return "tzwinlocal()"
def __str__(self):
# str will return the standard name, not the daylight name.
return "tzwinlocal(%s)" % repr(self._std_abbr)
def __reduce__(self):
return (self.__class__, ())
def picknthweekday(year, month, dayofweek, hour, minute, whichweek):
""" dayofweek == 0 means Sunday, whichweek 5 means last instance """
first = datetime.datetime(year, month, 1, hour, minute)
# This will work if dayofweek is ISO weekday (1-7) or Microsoft-style (0-6),
# Because 7 % 7 = 0
weekdayone = first.replace(day=((dayofweek - first.isoweekday()) % 7) + 1)
wd = weekdayone + ((whichweek - 1) * ONEWEEK)
if (wd.month != month):
wd -= ONEWEEK
return wd
def valuestodict(key):
"""Convert a registry key's values to a dictionary."""
dout = {}
size = winreg.QueryInfoKey(key)[1]
tz_res = None
for i in range(size):
key_name, value, dtype = winreg.EnumValue(key, i)
if dtype == winreg.REG_DWORD or dtype == winreg.REG_DWORD_LITTLE_ENDIAN:
# If it's a DWORD (32-bit integer), it's stored as unsigned - convert
# that to a proper signed integer
if value & (1 << 31):
value = value - (1 << 32)
elif dtype == winreg.REG_SZ:
# If it's a reference to the tzres DLL, load the actual string
if value.startswith('@tzres'):
tz_res = tz_res or tzres()
value = tz_res.name_from_string(value)
value = value.rstrip('\x00') # Remove trailing nulls
dout[key_name] = value
return dout

View file

@ -1,180 +1,2 @@
# This code was originally contributed by Jeffrey Harris.
import datetime
import struct
import _winreg
__author__ = "Jeffrey Harris & Gustavo Niemeyer <gustavo@niemeyer.net>"
__all__ = ["tzwin", "tzwinlocal"]
ONEWEEK = datetime.timedelta(7)
TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones"
TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
def _settzkeyname():
global TZKEYNAME
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
try:
_winreg.OpenKey(handle, TZKEYNAMENT).Close()
TZKEYNAME = TZKEYNAMENT
except WindowsError:
TZKEYNAME = TZKEYNAME9X
handle.Close()
_settzkeyname()
class tzwinbase(datetime.tzinfo):
"""tzinfo class based on win32's timezones available in the registry."""
def utcoffset(self, dt):
if self._isdst(dt):
return datetime.timedelta(minutes=self._dstoffset)
else:
return datetime.timedelta(minutes=self._stdoffset)
def dst(self, dt):
if self._isdst(dt):
minutes = self._dstoffset - self._stdoffset
return datetime.timedelta(minutes=minutes)
else:
return datetime.timedelta(0)
def tzname(self, dt):
if self._isdst(dt):
return self._dstname
else:
return self._stdname
def list():
"""Return a list of all time zones known to the system."""
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
tzkey = _winreg.OpenKey(handle, TZKEYNAME)
result = [_winreg.EnumKey(tzkey, i)
for i in range(_winreg.QueryInfoKey(tzkey)[0])]
tzkey.Close()
handle.Close()
return result
list = staticmethod(list)
def display(self):
return self._display
def _isdst(self, dt):
dston = picknthweekday(dt.year, self._dstmonth, self._dstdayofweek,
self._dsthour, self._dstminute,
self._dstweeknumber)
dstoff = picknthweekday(dt.year, self._stdmonth, self._stddayofweek,
self._stdhour, self._stdminute,
self._stdweeknumber)
if dston < dstoff:
return dston <= dt.replace(tzinfo=None) < dstoff
else:
return not dstoff <= dt.replace(tzinfo=None) < dston
class tzwin(tzwinbase):
def __init__(self, name):
self._name = name
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
tzkey = _winreg.OpenKey(handle, "%s\%s" % (TZKEYNAME, name))
keydict = valuestodict(tzkey)
tzkey.Close()
handle.Close()
self._stdname = keydict["Std"].encode("iso-8859-1")
self._dstname = keydict["Dlt"].encode("iso-8859-1")
self._display = keydict["Display"]
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
tup = struct.unpack("=3l16h", keydict["TZI"])
self._stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1
self._dstoffset = self._stdoffset-tup[2] # + DaylightBias * -1
(self._stdmonth,
self._stddayofweek, # Sunday = 0
self._stdweeknumber, # Last = 5
self._stdhour,
self._stdminute) = tup[4:9]
(self._dstmonth,
self._dstdayofweek, # Sunday = 0
self._dstweeknumber, # Last = 5
self._dsthour,
self._dstminute) = tup[12:17]
def __repr__(self):
return "tzwin(%s)" % repr(self._name)
def __reduce__(self):
return (self.__class__, (self._name,))
class tzwinlocal(tzwinbase):
def __init__(self):
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
tzlocalkey = _winreg.OpenKey(handle, TZLOCALKEYNAME)
keydict = valuestodict(tzlocalkey)
tzlocalkey.Close()
self._stdname = keydict["StandardName"].encode("iso-8859-1")
self._dstname = keydict["DaylightName"].encode("iso-8859-1")
try:
tzkey = _winreg.OpenKey(handle, "%s\%s"%(TZKEYNAME, self._stdname))
_keydict = valuestodict(tzkey)
self._display = _keydict["Display"]
tzkey.Close()
except OSError:
self._display = None
handle.Close()
self._stdoffset = -keydict["Bias"]-keydict["StandardBias"]
self._dstoffset = self._stdoffset-keydict["DaylightBias"]
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
tup = struct.unpack("=8h", keydict["StandardStart"])
(self._stdmonth,
self._stddayofweek, # Sunday = 0
self._stdweeknumber, # Last = 5
self._stdhour,
self._stdminute) = tup[1:6]
tup = struct.unpack("=8h", keydict["DaylightStart"])
(self._dstmonth,
self._dstdayofweek, # Sunday = 0
self._dstweeknumber, # Last = 5
self._dsthour,
self._dstminute) = tup[1:6]
def __reduce__(self):
return (self.__class__, ())
def picknthweekday(year, month, dayofweek, hour, minute, whichweek):
"""dayofweek == 0 means Sunday, whichweek 5 means last instance"""
first = datetime.datetime(year, month, 1, hour, minute)
weekdayone = first.replace(day=((dayofweek-first.isoweekday())%7+1))
for n in xrange(whichweek):
dt = weekdayone+(whichweek-n)*ONEWEEK
if dt.month == month:
return dt
def valuestodict(key):
"""Convert a registry key's values to a dictionary."""
dict = {}
size = _winreg.QueryInfoKey(key)[1]
for i in range(size):
data = _winreg.EnumValue(key, i)
dict[data[0]] = data[1]
return dict
# tzwin has moved to dateutil.tz.win
from .tz.win import *

71
libs/dateutil/utils.py Normal file
View file

@ -0,0 +1,71 @@
# -*- coding: utf-8 -*-
"""
This module offers general convenience and utility functions for dealing with
datetimes.
.. versionadded:: 2.7.0
"""
from __future__ import unicode_literals
from datetime import datetime, time
def today(tzinfo=None):
"""
Returns a :py:class:`datetime` representing the current day at midnight
:param tzinfo:
The time zone to attach (also used to determine the current day).
:return:
A :py:class:`datetime.datetime` object representing the current day
at midnight.
"""
dt = datetime.now(tzinfo)
return datetime.combine(dt.date(), time(0, tzinfo=tzinfo))
def default_tzinfo(dt, tzinfo):
"""
Sets the the ``tzinfo`` parameter on naive datetimes only
This is useful for example when you are provided a datetime that may have
either an implicit or explicit time zone, such as when parsing a time zone
string.
.. doctest::
>>> from dateutil.tz import tzoffset
>>> from dateutil.parser import parse
>>> from dateutil.utils import default_tzinfo
>>> dflt_tz = tzoffset("EST", -18000)
>>> print(default_tzinfo(parse('2014-01-01 12:30 UTC'), dflt_tz))
2014-01-01 12:30:00+00:00
>>> print(default_tzinfo(parse('2014-01-01 12:30'), dflt_tz))
2014-01-01 12:30:00-05:00
:param dt:
The datetime on which to replace the time zone
:param tzinfo:
The :py:class:`datetime.tzinfo` subclass instance to assign to
``dt`` if (and only if) it is naive.
:return:
Returns an aware :py:class:`datetime.datetime`.
"""
if dt.tzinfo is not None:
return dt
else:
return dt.replace(tzinfo=tzinfo)
def within_delta(dt1, dt2, delta):
"""
Useful for comparing two datetimes that may a negilible difference
to be considered equal.
"""
delta = abs(delta)
difference = dt1 - dt2
return -delta <= difference <= delta

View file

@ -1,87 +1,167 @@
"""
Copyright (c) 2003-2005 Gustavo Niemeyer <gustavo@niemeyer.net>
# -*- coding: utf-8 -*-
import warnings
import json
This module offers extensions to the standard python 2.3+
datetime module.
"""
from dateutil.tz import tzfile
from tarfile import TarFile
import os
from pkgutil import get_data
from io import BytesIO
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
from dateutil.tz import tzfile as _tzfile
__all__ = ["setcachesize", "gettz", "rebuild"]
__all__ = ["get_zonefile_instance", "gettz", "gettz_db_metadata"]
CACHE = []
CACHESIZE = 10
ZONEFILENAME = "dateutil-zoneinfo.tar.gz"
METADATA_FN = 'METADATA'
class tzfile(tzfile):
class tzfile(_tzfile):
def __reduce__(self):
return (gettz, (self._filename,))
def getzoneinfofile():
filenames = os.listdir(os.path.join(os.path.dirname(__file__)))
filenames.sort()
filenames.reverse()
for entry in filenames:
if entry.startswith("zoneinfo") and ".tar." in entry:
return os.path.join(os.path.dirname(__file__), entry)
return None
ZONEINFOFILE = getzoneinfofile()
def getzoneinfofile_stream():
try:
return BytesIO(get_data(__name__, ZONEFILENAME))
except IOError as e: # TODO switch to FileNotFoundError?
warnings.warn("I/O error({0}): {1}".format(e.errno, e.strerror))
return None
del getzoneinfofile
def setcachesize(size):
global CACHESIZE, CACHE
CACHESIZE = size
del CACHE[size:]
class ZoneInfoFile(object):
def __init__(self, zonefile_stream=None):
if zonefile_stream is not None:
with TarFile.open(fileobj=zonefile_stream) as tf:
self.zones = {zf.name: tzfile(tf.extractfile(zf), filename=zf.name)
for zf in tf.getmembers()
if zf.isfile() and zf.name != METADATA_FN}
# deal with links: They'll point to their parent object. Less
# waste of memory
links = {zl.name: self.zones[zl.linkname]
for zl in tf.getmembers() if
zl.islnk() or zl.issym()}
self.zones.update(links)
try:
metadata_json = tf.extractfile(tf.getmember(METADATA_FN))
metadata_str = metadata_json.read().decode('UTF-8')
self.metadata = json.loads(metadata_str)
except KeyError:
# no metadata in tar file
self.metadata = None
else:
self.zones = {}
self.metadata = None
def get(self, name, default=None):
"""
Wrapper for :func:`ZoneInfoFile.zones.get`. This is a convenience method
for retrieving zones from the zone dictionary.
:param name:
The name of the zone to retrieve. (Generally IANA zone names)
:param default:
The value to return in the event of a missing key.
.. versionadded:: 2.6.0
"""
return self.zones.get(name, default)
# The current API has gettz as a module function, although in fact it taps into
# a stateful class. So as a workaround for now, without changing the API, we
# will create a new "global" class instance the first time a user requests a
# timezone. Ugly, but adheres to the api.
#
# TODO: Remove after deprecation period.
_CLASS_ZONE_INSTANCE = []
def get_zonefile_instance(new_instance=False):
"""
This is a convenience function which provides a :class:`ZoneInfoFile`
instance using the data provided by the ``dateutil`` package. By default, it
caches a single instance of the ZoneInfoFile object and returns that.
:param new_instance:
If ``True``, a new instance of :class:`ZoneInfoFile` is instantiated and
used as the cached instance for the next call. Otherwise, new instances
are created only as necessary.
:return:
Returns a :class:`ZoneInfoFile` object.
.. versionadded:: 2.6
"""
if new_instance:
zif = None
else:
zif = getattr(get_zonefile_instance, '_cached_instance', None)
if zif is None:
zif = ZoneInfoFile(getzoneinfofile_stream())
get_zonefile_instance._cached_instance = zif
return zif
def gettz(name):
tzinfo = None
if ZONEINFOFILE:
for cachedname, tzinfo in CACHE:
if cachedname == name:
break
else:
tf = TarFile.open(ZONEINFOFILE)
try:
zonefile = tf.extractfile(name)
except KeyError:
tzinfo = None
else:
tzinfo = tzfile(zonefile)
tf.close()
CACHE.insert(0, (name, tzinfo))
del CACHE[CACHESIZE:]
return tzinfo
"""
This retrieves a time zone from the local zoneinfo tarball that is packaged
with dateutil.
def rebuild(filename, tag=None, format="gz"):
import tempfile, shutil
tmpdir = tempfile.mkdtemp()
zonedir = os.path.join(tmpdir, "zoneinfo")
moduledir = os.path.dirname(__file__)
if tag: tag = "-"+tag
targetname = "zoneinfo%s.tar.%s" % (tag, format)
try:
tf = TarFile.open(filename)
for name in tf.getnames():
if not (name.endswith(".sh") or
name.endswith(".tab") or
name == "leapseconds"):
tf.extract(name, tmpdir)
filepath = os.path.join(tmpdir, name)
os.system("zic -d %s %s" % (zonedir, filepath))
tf.close()
target = os.path.join(moduledir, targetname)
for entry in os.listdir(moduledir):
if entry.startswith("zoneinfo") and ".tar." in entry:
os.unlink(os.path.join(moduledir, entry))
tf = TarFile.open(target, "w:%s" % format)
for entry in os.listdir(zonedir):
entrypath = os.path.join(zonedir, entry)
tf.add(entrypath, entry)
tf.close()
finally:
shutil.rmtree(tmpdir)
:param name:
An IANA-style time zone name, as found in the zoneinfo file.
:return:
Returns a :class:`dateutil.tz.tzfile` time zone object.
.. warning::
It is generally inadvisable to use this function, and it is only
provided for API compatibility with earlier versions. This is *not*
equivalent to ``dateutil.tz.gettz()``, which selects an appropriate
time zone based on the inputs, favoring system zoneinfo. This is ONLY
for accessing the dateutil-specific zoneinfo (which may be out of
date compared to the system zoneinfo).
.. deprecated:: 2.6
If you need to use a specific zoneinfofile over the system zoneinfo,
instantiate a :class:`dateutil.zoneinfo.ZoneInfoFile` object and call
:func:`dateutil.zoneinfo.ZoneInfoFile.get(name)` instead.
Use :func:`get_zonefile_instance` to retrieve an instance of the
dateutil-provided zoneinfo.
"""
warnings.warn("zoneinfo.gettz() will be removed in future versions, "
"to use the dateutil-provided zoneinfo files, instantiate a "
"ZoneInfoFile object and use ZoneInfoFile.zones.get() "
"instead. See the documentation for details.",
DeprecationWarning)
if len(_CLASS_ZONE_INSTANCE) == 0:
_CLASS_ZONE_INSTANCE.append(ZoneInfoFile(getzoneinfofile_stream()))
return _CLASS_ZONE_INSTANCE[0].zones.get(name)
def gettz_db_metadata():
""" Get the zonefile metadata
See `zonefile_metadata`_
:returns:
A dictionary with the database metadata
.. deprecated:: 2.6
See deprecation warning in :func:`zoneinfo.gettz`. To get metadata,
query the attribute ``zoneinfo.ZoneInfoFile.metadata``.
"""
warnings.warn("zoneinfo.gettz_db_metadata() will be removed in future "
"versions, to use the dateutil-provided zoneinfo files, "
"ZoneInfoFile object and query the 'metadata' attribute "
"instead. See the documentation for details.",
DeprecationWarning)
if len(_CLASS_ZONE_INSTANCE) == 0:
_CLASS_ZONE_INSTANCE.append(ZoneInfoFile(getzoneinfofile_stream()))
return _CLASS_ZONE_INSTANCE[0].metadata

Binary file not shown.

View file

@ -0,0 +1,53 @@
import logging
import os
import tempfile
import shutil
import json
from subprocess import check_call
from tarfile import TarFile
from dateutil.zoneinfo import METADATA_FN, ZONEFILENAME
def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None):
"""Rebuild the internal timezone info in dateutil/zoneinfo/zoneinfo*tar*
filename is the timezone tarball from ``ftp.iana.org/tz``.
"""
tmpdir = tempfile.mkdtemp()
zonedir = os.path.join(tmpdir, "zoneinfo")
moduledir = os.path.dirname(__file__)
try:
with TarFile.open(filename) as tf:
for name in zonegroups:
tf.extract(name, tmpdir)
filepaths = [os.path.join(tmpdir, n) for n in zonegroups]
try:
check_call(["zic", "-d", zonedir] + filepaths)
except OSError as e:
_print_on_nosuchfile(e)
raise
# write metadata file
with open(os.path.join(zonedir, METADATA_FN), 'w') as f:
json.dump(metadata, f, indent=4, sort_keys=True)
target = os.path.join(moduledir, ZONEFILENAME)
with TarFile.open(target, "w:%s" % format) as tf:
for entry in os.listdir(zonedir):
entrypath = os.path.join(zonedir, entry)
tf.add(entrypath, entry)
finally:
shutil.rmtree(tmpdir)
def _print_on_nosuchfile(e):
"""Print helpful troubleshooting message
e is an exception raised by subprocess.check_call()
"""
if e.errno == 2:
logging.error(
"Could not find zic. Perhaps you need to install "
"libc-bin or some other package that provides it, "
"or it's not in your PATH?")

View file

@ -3,6 +3,12 @@
"""
Extracts as much information as possible from a video file.
"""
from . import monkeypatch as _monkeypatch
from .api import guessit, GuessItApi
from .options import ConfigurationException
from .rules.common.quantity import Size
from .__version__ import __version__
_monkeypatch.monkeypatch_rebulk()

View file

@ -12,36 +12,46 @@ import os
import sys
import six
from rebulk.__version__ import __version__ as __rebulk_version__
from guessit import api
from guessit.__version__ import __version__
from guessit.jsonutils import GuessitEncoder
from guessit.options import argument_parser
from rebulk.__version__ import __version__ as __rebulk_version__
from guessit.options import argument_parser, parse_options, load_config, merge_options
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
def guess_filename(filename, options):
"""
Guess a single filename using given options
:param filename: filename to parse
:type filename: str
:param options:
:type options: dict
:return:
:rtype:
"""
if not options.yaml and not options.json and not options.show_property:
if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
print('For:', filename)
cmd_options = vars(options)
cmd_options['implicit'] = True # Force implicit option in CLI
guess = api.guessit(filename, options)
guess = api.guessit(filename, vars(options))
if options.show_property:
print(guess.get(options.show_property, ''))
if options.get('show_property'):
print(guess.get(options.get('show_property'), ''))
return
if options.json:
if options.get('json'):
print(json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False))
elif options.yaml:
elif options.get('yaml'):
import yaml
from guessit import yamlutils
ystr = yaml.dump({filename: dict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
ystr = yaml.dump({filename: OrderedDict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
allow_unicode=True)
i = 0
for yline in ystr.splitlines():
@ -62,15 +72,15 @@ def display_properties(options):
"""
properties = api.properties(options)
if options.json:
if options.values:
if options.get('json'):
if options.get('values'):
print(json.dumps(properties, cls=GuessitEncoder, ensure_ascii=False))
else:
print(json.dumps(list(properties.keys()), cls=GuessitEncoder, ensure_ascii=False))
elif options.yaml:
elif options.get('yaml'):
import yaml
from guessit import yamlutils
if options.values:
if options.get('values'):
print(yaml.dump(properties, Dumper=yamlutils.CustomDumper, default_flow_style=False, allow_unicode=True))
else:
print(yaml.dump(list(properties.keys()), Dumper=yamlutils.CustomDumper, default_flow_style=False,
@ -82,14 +92,14 @@ def display_properties(options):
for property_name in properties_list:
property_values = properties.get(property_name)
print(2 * ' ' + '[+] %s' % (property_name,))
if property_values and options.values:
if property_values and options.get('values'):
for property_value in property_values:
print(4 * ' ' + '[!] %s' % (property_value,))
def main(args=None): # pylint:disable=too-many-branches
def fix_argv_encoding():
"""
Main function for entry point
Fix encoding of sys.argv on windows Python 2
"""
if six.PY2 and os.name == 'nt': # pragma: no cover
# see http://bugs.python.org/issue2128
@ -98,17 +108,28 @@ def main(args=None): # pylint:disable=too-many-branches
for i, j in enumerate(sys.argv):
sys.argv[i] = j.decode(locale.getpreferredencoding())
def main(args=None): # pylint:disable=too-many-branches
"""
Main function for entry point
"""
fix_argv_encoding()
if args is None: # pragma: no cover
options = argument_parser.parse_args()
options = parse_options()
else:
options = argument_parser.parse_args(args)
if options.verbose:
options = parse_options(args)
config = load_config(options)
options = merge_options(config, options)
if options.get('verbose'):
logging.basicConfig(stream=sys.stdout, format='%(message)s')
logging.getLogger().setLevel(logging.DEBUG)
help_required = True
if options.version:
if options.get('version'):
print('+-------------------------------------------------------+')
print('+ GuessIt ' + __version__ + (28 - len(__version__)) * ' ' + '+')
print('+-------------------------------------------------------+')
@ -119,26 +140,26 @@ def main(args=None): # pylint:disable=too-many-branches
print('+-------------------------------------------------------+')
help_required = False
if options.yaml:
if options.get('yaml'):
try:
import yaml # pylint:disable=unused-variable
except ImportError: # pragma: no cover
options.yaml = False
del options['yaml']
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)
if options.properties or options.values:
if options.get('properties') or options.get('values'):
display_properties(options)
help_required = False
filenames = []
if options.filename:
for filename in options.filename:
if options.get('filename'):
for filename in options.get('filename'):
filenames.append(filename)
if options.input_file:
if options.get('input_file'):
if six.PY2:
input_file = open(options.input_file, 'r')
input_file = open(options.get('input_file'), 'r')
else:
input_file = open(options.input_file, 'r', encoding='utf-8')
input_file = open(options.get('input_file'), 'r', encoding='utf-8')
try:
filenames.extend([line.strip() for line in input_file.readlines()])
finally:

View file

@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '2.1.1.dev0'
__version__ = '3.0.3'

View file

@ -3,26 +3,28 @@
"""
API functions that can be used by external software
"""
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
import os
import traceback
import six
from rebulk.introspector import introspect
from .rules import rebulk_builder
from .options import parse_options
from .__version__ import __version__
from .options import parse_options, load_config, merge_options
from .rules import rebulk_builder
class GuessitException(Exception):
"""
Exception raised when guessit fails to perform a guess because of an internal error.
"""
def __init__(self, string, options):
super(GuessitException, self).__init__("An internal error has occured in guessit.\n"
"===================== Guessit Exception Report =====================\n"
@ -41,12 +43,27 @@ class GuessitException(Exception):
self.options = options
def configure(options=None, rules_builder=rebulk_builder, force=False):
"""
Load configuration files and initialize rebulk rules if required.
:param options:
:type options: dict
:param rules_builder:
:type rules_builder:
:param force:
:type force: bool
:return:
"""
default_api.configure(options, rules_builder=rules_builder, force=force)
def guessit(string, options=None):
"""
Retrieves all matches from string as a dict
:param string: the filename or release name
:type string: str
:param options: the filename or release name
:param options:
:type options: str|dict
:return:
:rtype:
@ -58,7 +75,7 @@ def properties(options=None):
"""
Retrieves all properties with possible values that can be guessed
:param options:
:type options:
:type options: str|dict
:return:
:rtype:
"""
@ -70,53 +87,113 @@ class GuessItApi(object):
An api class that can be configured with custom Rebulk configuration.
"""
def __init__(self, rebulk):
"""
:param rebulk: Rebulk instance to use.
:type rebulk: Rebulk
:return:
:rtype:
"""
self.rebulk = rebulk
def __init__(self):
"""Default constructor."""
self.rebulk = None
self.config = None
self.load_config_options = None
self.advanced_config = None
@staticmethod
def _fix_option_encoding(value):
@classmethod
def _fix_encoding(cls, value):
if isinstance(value, list):
return [GuessItApi._fix_option_encoding(item) for item in value]
return [cls._fix_encoding(item) for item in value]
if isinstance(value, dict):
return {cls._fix_encoding(k): cls._fix_encoding(v) for k, v in value.items()}
if six.PY2 and isinstance(value, six.text_type):
return value.encode("utf-8")
return value.encode('utf-8')
if six.PY3 and isinstance(value, six.binary_type):
return value.decode('ascii')
return value
def guessit(self, string, options=None):
@classmethod
def _has_same_properties(cls, dic1, dic2, values):
for value in values:
if dic1.get(value) != dic2.get(value):
return False
return True
def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True):
"""
Load configuration files and initialize rebulk rules if required.
:param options:
:type options: str|dict
:param rules_builder:
:type rules_builder:
:param force:
:type force: bool
:return:
:rtype: dict
"""
if sanitize_options:
options = parse_options(options, True)
options = self._fix_encoding(options)
if self.config is None or self.load_config_options is None or force or \
not self._has_same_properties(self.load_config_options,
options,
['config', 'no_user_config', 'no_default_config']):
config = load_config(options)
config = self._fix_encoding(config)
self.load_config_options = options
else:
config = self.config
advanced_config = merge_options(config.get('advanced_config'), options.get('advanced_config'))
should_build_rebulk = force or not self.rebulk or not self.advanced_config or \
self.advanced_config != advanced_config
if should_build_rebulk:
self.advanced_config = advanced_config
self.rebulk = rules_builder(advanced_config)
self.config = config
return self.config
def guessit(self, string, options=None): # pylint: disable=too-many-branches
"""
Retrieves all matches from string as a dict
:param string: the filename or release name
:type string: str
:param options: the filename or release name
:type string: str|Path
:param options:
:type options: str|dict
:return:
:rtype:
"""
try:
options = parse_options(options)
from pathlib import Path
if isinstance(string, Path):
try:
# Handle path-like object
string = os.fspath(string)
except AttributeError:
string = str(string)
except ImportError:
pass
try:
options = parse_options(options, True)
options = self._fix_encoding(options)
config = self.configure(options, sanitize_options=False)
options = merge_options(config, options)
result_decode = False
result_encode = False
fixed_options = {}
for (key, value) in options.items():
key = GuessItApi._fix_option_encoding(key)
value = GuessItApi._fix_option_encoding(value)
fixed_options[key] = value
options = fixed_options
if six.PY2:
if isinstance(string, six.text_type):
string = string.encode("utf-8")
result_decode = True
elif isinstance(string, six.binary_type):
string = six.binary_type(string)
if six.PY3:
if isinstance(string, six.binary_type):
string = string.decode('ascii')
result_encode = True
elif isinstance(string, six.text_type):
string = six.text_type(string)
if six.PY2 and isinstance(string, six.text_type):
string = string.encode("utf-8")
result_decode = True
if six.PY3 and isinstance(string, six.binary_type):
string = string.decode('ascii')
result_encode = True
matches = self.rebulk.matches(string, options)
if result_decode:
for match in matches:
@ -126,7 +203,8 @@ class GuessItApi(object):
for match in matches:
if isinstance(match.value, six.text_type):
match.value = match.value.encode("ascii")
return matches.to_dict(options.get('advanced', False), options.get('implicit', False))
return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
options.get('enforce_list', False))
except:
raise GuessitException(string, options)
@ -138,6 +216,10 @@ class GuessItApi(object):
:return:
:rtype:
"""
options = parse_options(options, True)
options = self._fix_encoding(options)
config = self.configure(options, sanitize_options=False)
options = merge_options(config, options)
unordered = introspect(self.rebulk, options).properties
ordered = OrderedDict()
for k in sorted(unordered.keys(), key=six.text_type):
@ -147,4 +229,4 @@ class GuessItApi(object):
return ordered
default_api = GuessItApi(rebulk_builder())
default_api = GuessItApi()

View file

@ -0,0 +1,362 @@
{
"expected_title": [
"OSS 117"
],
"allowed_countries": [
"au",
"us",
"gb"
],
"allowed_languages": [
"de",
"en",
"es",
"ca",
"cs",
"fr",
"he",
"hi",
"hu",
"it",
"ja",
"ko",
"nl",
"pl",
"pt",
"ro",
"ru",
"sv",
"te",
"uk",
"mul",
"und"
],
"advanced_config": {
"common_words": [
"de",
"it"
],
"groups": {
"starting": "([{",
"ending": ")]}"
},
"container": {
"subtitles": [
"srt",
"idx",
"sub",
"ssa",
"ass"
],
"info": [
"nfo"
],
"videos": [
"3g2",
"3gp",
"3gp2",
"asf",
"avi",
"divx",
"flv",
"mk3d",
"m4v",
"mk2",
"mka",
"mkv",
"mov",
"mp4",
"mp4a",
"mpeg",
"mpg",
"ogg",
"ogm",
"ogv",
"qt",
"ra",
"ram",
"rm",
"ts",
"wav",
"webm",
"wma",
"wmv",
"iso",
"vob"
],
"torrent": [
"torrent"
],
"nzb": [
"nzb"
]
},
"country": {
"synonyms": {
"ES": [
"españa"
],
"GB": [
"UK"
],
"BR": [
"brazilian",
"bra"
],
"CA": [
"québec",
"quebec",
"qc"
],
"MX": [
"Latinoamérica",
"latin america"
]
}
},
"episodes": {
"season_max_range": 100,
"episode_max_range": 100,
"max_range_gap": 1,
"season_markers": [
"s"
],
"season_ep_markers": [
"x"
],
"disc_markers": [
"d"
],
"episode_markers": [
"xe",
"ex",
"ep",
"e",
"x"
],
"range_separators": [
"-",
"~",
"to",
"a"
],
"discrete_separators": [
"+",
"&",
"and",
"et"
],
"season_words": [
"season",
"saison",
"seizoen",
"seasons",
"saisons",
"tem",
"temp",
"temporada",
"temporadas",
"stagione"
],
"episode_words": [
"episode",
"episodes",
"eps",
"ep",
"episodio",
"episodios",
"capitulo",
"capitulos"
],
"of_words": [
"of",
"sur"
],
"all_words": [
"All"
]
},
"language": {
"synonyms": {
"ell": [
"gr",
"greek"
],
"spa": [
"esp",
"español",
"espanol"
],
"fra": [
"français",
"vf",
"vff",
"vfi",
"vfq"
],
"swe": [
"se"
],
"por_BR": [
"po",
"pb",
"pob",
"ptbr",
"br",
"brazilian"
],
"deu_CH": [
"swissgerman",
"swiss german"
],
"nld_BE": [
"flemish"
],
"cat": [
"català",
"castellano",
"espanol castellano",
"español castellano"
],
"ces": [
"cz"
],
"ukr": [
"ua"
],
"zho": [
"cn"
],
"jpn": [
"jp"
],
"hrv": [
"scr"
],
"mul": [
"multi",
"dl"
]
},
"subtitle_affixes": [
"sub",
"subs",
"esub",
"esubs",
"subbed",
"custom subbed",
"custom subs",
"custom sub",
"customsubbed",
"customsubs",
"customsub",
"soft subtitles",
"soft subs"
],
"subtitle_prefixes": [
"st",
"v",
"vost",
"subforced",
"fansub",
"hardsub",
"legenda",
"legendas",
"legendado",
"subtitulado",
"soft",
"subtitles"
],
"subtitle_suffixes": [
"subforced",
"fansub",
"hardsub"
],
"language_affixes": [
"dublado",
"dubbed",
"dub"
],
"language_prefixes": [
"true"
],
"language_suffixes": [
"audio"
],
"weak_affixes": [
"v",
"audio",
"true"
]
},
"part": {
"prefixes": [
"pt",
"part"
]
},
"release_group": {
"forbidden_names": [
"rip",
"by",
"for",
"par",
"pour",
"bonus"
],
"ignored_seps": "[]{}()"
},
"screen_size": {
"frame_rates": [
"23.976",
"24",
"25",
"30",
"48",
"50",
"60",
"120"
],
"min_ar": 1.333,
"max_ar": 1.898,
"interlaced": [
"360",
"480",
"576",
"900",
"1080"
],
"progressive": [
"360",
"480",
"576",
"900",
"1080",
"368",
"720",
"1440",
"2160",
"4320"
]
},
"website": {
"safe_tlds": [
"com",
"org",
"net"
],
"safe_subdomains": [
"www"
],
"safe_prefixes": [
"co",
"com",
"org",
"net"
],
"prefixes": [
"from"
]
}
}
}

View file

@ -4,14 +4,10 @@
JSON Utils
"""
import json
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
from six import text_type
from rebulk.match import Match
class GuessitEncoder(json.JSONEncoder):
"""
JSON Encoder for guessit response
@ -19,14 +15,8 @@ class GuessitEncoder(json.JSONEncoder):
def default(self, o): # pylint:disable=method-hidden
if isinstance(o, Match):
ret = OrderedDict()
ret['value'] = o.value
if o.raw:
ret['raw'] = o.raw
ret['start'] = o.start
ret['end'] = o.end
return ret
elif hasattr(o, 'name'): # Babelfish languages/countries long name
return str(o.name)
else: # pragma: no cover
return str(o)
return o.advanced
if hasattr(o, 'name'): # Babelfish languages/countries long name
return text_type(o.name)
# pragma: no cover
return text_type(o)

View file

@ -0,0 +1,34 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Monkeypatch initialisation functions
"""
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
from rebulk.match import Match
def monkeypatch_rebulk():
"""Monkeypatch rebulk classes"""
@property
def match_advanced(self):
"""
Build advanced dict from match
:param self:
:return:
"""
ret = OrderedDict()
ret['value'] = self.value
if self.raw:
ret['raw'] = self.raw
ret['start'] = self.start
ret['end'] = self.end
return ret
Match.advanced = match_advanced

View file

@ -3,9 +3,14 @@
"""
Options
"""
from argparse import ArgumentParser
import copy
import json
import os
import pkgutil
import shlex
from argparse import ArgumentParser
import six
@ -21,68 +26,270 @@ def build_argument_parser():
naming_opts = opts.add_argument_group("Naming")
naming_opts.add_argument('-t', '--type', dest='type', default=None,
help='The suggested file type: movie, episode. If undefined, type will be guessed.')
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=False,
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=None,
help='Parse files as name only, considering "/" and "\\" like other separators.')
naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
help='If short date is found, consider the first digits as the year.')
naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
help='If short date is found, consider the second digits as the day.')
naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages',
naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages', default=None,
help='Allowed language (can be used multiple times)')
naming_opts.add_argument('-C', '--allowed-countries', action='append', dest='allowed_countries',
naming_opts.add_argument('-C', '--allowed-countries', action='append', dest='allowed_countries', default=None,
help='Allowed country (can be used multiple times)')
naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number',
default=False,
default=None,
help='Guess "serie.213.avi" as the episode 213. Without this option, '
'it will be guessed as season 2, episode 13')
naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title',
naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title', default=None,
help='Expected title to parse (can be used multiple times)')
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group',
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group', default=None,
help='Expected release group (can be used multiple times)')
naming_opts.add_argument('--includes', action='append', dest='includes', default=None,
help='List of properties to be detected')
naming_opts.add_argument('--excludes', action='append', dest='excludes', default=None,
help='List of properties to be ignored')
input_opts = opts.add_argument_group("Input")
input_opts.add_argument('-f', '--input-file', dest='input_file', default=False,
input_opts.add_argument('-f', '--input-file', dest='input_file', default=None,
help='Read filenames from an input text file. File should use UTF-8 charset.')
output_opts = opts.add_argument_group("Output")
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False,
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=None,
help='Display debug output')
output_opts.add_argument('-P', '--show-property', dest='show_property', default=None,
help='Display the value of a single property (title, series, video_codec, year, ...)')
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=False,
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=None,
help='Display advanced information for filename guesses, as json output')
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=False,
output_opts.add_argument('-s', '--single-value', dest='single_value', action='store_true', default=None,
help='Keep only first value found for each property')
output_opts.add_argument('-l', '--enforce-list', dest='enforce_list', action='store_true', default=None,
help='Wrap each found value in a list even when property has a single value')
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=None,
help='Display information for filename guesses as json output')
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=False,
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None,
help='Display information for filename guesses as yaml output')
conf_opts = opts.add_argument_group("Configuration")
conf_opts.add_argument('-c', '--config', dest='config', action='append', default=None,
help='Filepath to configuration file. Configuration file contains the same '
'options as those from command line options, but option names have "-" characters '
'replaced with "_". This configuration will be merged with default and user '
'configuration files.')
conf_opts.add_argument('--no-user-config', dest='no_user_config', action='store_true',
default=None,
help='Disable user configuration. If not defined, guessit tries to read configuration files '
'at ~/.guessit/options.(json|yml|yaml) and ~/.config/guessit/options.(json|yml|yaml)')
conf_opts.add_argument('--no-default-config', dest='no_default_config', action='store_true',
default=None,
help='Disable default configuration. This should be done only if you are providing a full '
'configuration through user configuration or --config option. If no "advanced_config" '
'is provided by another configuration file, it will still be loaded from default '
'configuration.')
information_opts = opts.add_argument_group("Information")
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=False,
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=None,
help='Display properties that can be guessed.')
information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=False,
information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=None,
help='Display property values that can be guessed.')
information_opts.add_argument('--version', dest='version', action='store_true', default=False,
information_opts.add_argument('--version', dest='version', action='store_true', default=None,
help='Display the guessit version.')
return opts
def parse_options(options):
def parse_options(options=None, api=False):
"""
Parse given option string
:param options:
:type options:
:param api
:type api: boolean
:return:
:rtype:
"""
if isinstance(options, six.string_types):
args = shlex.split(options)
options = vars(argument_parser.parse_args(args))
if options is None:
options = {}
elif options is None:
if api:
options = {}
else:
options = vars(argument_parser.parse_args())
elif not isinstance(options, dict):
options = vars(argument_parser.parse_args(options))
return options
argument_parser = build_argument_parser()
class ConfigurationException(Exception):
"""
Exception related to configuration file.
"""
pass
def load_config(options):
"""
Load options from configuration files, if defined and present.
:param options:
:type options:
:return:
:rtype:
"""
configurations = []
if not options.get('no_default_config'):
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
default_options = json.loads(default_options_data)
configurations.append(default_options)
config_files = []
if not options.get('no_user_config'):
home_directory = os.path.expanduser("~")
cwd = os.getcwd()
yaml_supported = False
try:
import yaml # pylint: disable=unused-variable
yaml_supported = True
except ImportError:
pass
config_file_locations = get_options_file_locations(home_directory, cwd, yaml_supported)
config_files = [f for f in config_file_locations if os.path.exists(f)]
custom_config_files = options.get('config')
if custom_config_files:
config_files = config_files + custom_config_files
for config_file in config_files:
config_file_options = load_config_file(config_file)
if config_file_options:
configurations.append(config_file_options)
config = {}
if configurations:
config = merge_options(*configurations)
if 'advanced_config' not in config:
# Guessit doesn't work without advanced_config, so we use default if no configuration files provides it.
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
default_options = json.loads(default_options_data)
config['advanced_config'] = default_options['advanced_config']
return config
def merge_options(*options):
"""
Merge options into a single options dict.
:param options:
:type options:
:return:
:rtype:
"""
merged = {}
if options:
if options[0]:
merged.update(copy.deepcopy(options[0]))
for options in options[1:]:
if options:
pristine = options.get('pristine')
if pristine is True:
merged = {}
elif pristine:
for to_reset in pristine:
if to_reset in merged:
del merged[to_reset]
for (option, value) in options.items():
merge_option_value(option, value, merged)
return merged
def merge_option_value(option, value, merged):
"""
Merge option value
:param option:
:param value:
:param merged:
:return:
"""
if value is not None and option != 'pristine':
if option in merged.keys() and isinstance(merged[option], list):
for val in value:
if val not in merged[option]:
merged[option].append(val)
elif option in merged.keys() and isinstance(merged[option], dict):
merged[option] = merge_options(merged[option], value)
elif isinstance(value, list):
merged[option] = list(value)
else:
merged[option] = value
def load_config_file(filepath):
"""
Load a configuration as an options dict.
Format of the file is given with filepath extension.
:param filepath:
:type filepath:
:return:
:rtype:
"""
if filepath.endswith('.json'):
with open(filepath) as config_file_data:
return json.load(config_file_data)
if filepath.endswith('.yaml') or filepath.endswith('.yml'):
try:
import yaml
with open(filepath) as config_file_data:
return yaml.load(config_file_data)
except ImportError: # pragma: no cover
raise ConfigurationException('Configuration file extension is not supported. '
'PyYAML should be installed to support "%s" file' % (
filepath,))
try:
# Try to load input as JSON
return json.loads(filepath)
except: # pylint: disable=bare-except
pass
raise ConfigurationException('Configuration file extension is not supported for "%s" file.' % (filepath,))
def get_options_file_locations(homedir, cwd, yaml_supported=False):
"""
Get all possible locations for options file.
:param homedir: user home directory
:type homedir: basestring
:param cwd: current working directory
:type homedir: basestring
:return:
:rtype: list
"""
locations = []
configdirs = [(os.path.join(homedir, '.guessit'), 'options'),
(os.path.join(homedir, '.config', 'guessit'), 'options'),
(cwd, 'guessit.options')]
configexts = ['json']
if yaml_supported:
configexts.append('yaml')
configexts.append('yml')
for configdir in configdirs:
for configext in configexts:
locations.append(os.path.join(configdir[0], configdir[1] + '.' + configext))
return locations

View file

@ -10,7 +10,7 @@ from .markers.groups import groups
from .properties.episodes import episodes
from .properties.container import container
from .properties.format import format_
from .properties.source import source
from .properties.video_codec import video_codec
from .properties.audio_codec import audio_codec
from .properties.screen_size import screen_size
@ -21,7 +21,10 @@ from .properties.episode_title import episode_title
from .properties.language import language
from .properties.country import country
from .properties.release_group import release_group
from .properties.streaming_service import streaming_service
from .properties.other import other
from .properties.size import size
from .properties.bit_rate import bit_rate
from .properties.edition import edition
from .properties.cds import cds
from .properties.bonus import bonus
@ -34,42 +37,50 @@ from .properties.type import type_
from .processors import processors
def rebulk_builder():
def rebulk_builder(config):
"""
Default builder for main Rebulk object used by api.
:return: Main Rebulk object
:rtype: Rebulk
"""
def _config(name):
return config.get(name, {})
rebulk = Rebulk()
rebulk.rebulk(path())
rebulk.rebulk(groups())
common_words = frozenset(_config('common_words'))
rebulk.rebulk(episodes())
rebulk.rebulk(container())
rebulk.rebulk(format_())
rebulk.rebulk(video_codec())
rebulk.rebulk(audio_codec())
rebulk.rebulk(screen_size())
rebulk.rebulk(website())
rebulk.rebulk(date())
rebulk.rebulk(title())
rebulk.rebulk(episode_title())
rebulk.rebulk(language())
rebulk.rebulk(country())
rebulk.rebulk(release_group())
rebulk.rebulk(other())
rebulk.rebulk(edition())
rebulk.rebulk(cds())
rebulk.rebulk(bonus())
rebulk.rebulk(film())
rebulk.rebulk(part())
rebulk.rebulk(crc())
rebulk.rebulk(path(_config('path')))
rebulk.rebulk(groups(_config('groups')))
rebulk.rebulk(processors())
rebulk.rebulk(episodes(_config('episodes')))
rebulk.rebulk(container(_config('container')))
rebulk.rebulk(source(_config('source')))
rebulk.rebulk(video_codec(_config('video_codec')))
rebulk.rebulk(audio_codec(_config('audio_codec')))
rebulk.rebulk(screen_size(_config('screen_size')))
rebulk.rebulk(website(_config('website')))
rebulk.rebulk(date(_config('date')))
rebulk.rebulk(title(_config('title')))
rebulk.rebulk(episode_title(_config('episode_title')))
rebulk.rebulk(language(_config('language'), common_words))
rebulk.rebulk(country(_config('country'), common_words))
rebulk.rebulk(release_group(_config('release_group')))
rebulk.rebulk(streaming_service(_config('streaming_service')))
rebulk.rebulk(other(_config('other')))
rebulk.rebulk(size(_config('size')))
rebulk.rebulk(bit_rate(_config('bit_rate')))
rebulk.rebulk(edition(_config('edition')))
rebulk.rebulk(cds(_config('cds')))
rebulk.rebulk(bonus(_config('bonus')))
rebulk.rebulk(film(_config('film')))
rebulk.rebulk(part(_config('part')))
rebulk.rebulk(crc(_config('crc')))
rebulk.rebulk(mimetype())
rebulk.rebulk(type_())
rebulk.rebulk(processors(_config('processors')))
rebulk.rebulk(mimetype(_config('mimetype')))
rebulk.rebulk(type_(_config('type')))
def customize_properties(properties):
"""

View file

@ -6,6 +6,7 @@ Common module
import re
seps = r' [](){}+*|=-_~#/\\.,;:' # list of tags/words separators
seps_no_groups = seps.replace('[](){}', '')
seps_no_fs = seps.replace('/', '').replace('\\', '')
title_seps = r'-+/\|' # separators for title

View file

@ -13,22 +13,26 @@ def marker_comparator_predicate(match):
"""
Match predicate used in comparator
"""
return not match.private and \
match.name not in ['proper_count', 'title', 'episode_title', 'alternative_title'] and \
not (match.name == 'container' and 'extension' in match.tags)
return (
not match.private
and match.name not in ('proper_count', 'title')
and not (match.name == 'container' and 'extension' in match.tags)
and not (match.name == 'other' and match.value == 'Rip')
)
def marker_weight(matches, marker):
def marker_weight(matches, marker, predicate):
"""
Compute the comparator weight of a marker
:param matches:
:param marker:
:param predicate:
:return:
"""
return len(set(match.name for match in matches.range(*marker.span, predicate=marker_comparator_predicate)))
return len(set(match.name for match in matches.range(*marker.span, predicate=predicate)))
def marker_comparator(matches, markers):
def marker_comparator(matches, markers, predicate):
"""
Builds a comparator that returns markers sorted from the most valuable to the less.
@ -36,33 +40,36 @@ def marker_comparator(matches, markers):
:param matches:
:type matches:
:param markers:
:param predicate:
:return:
:rtype:
"""
def comparator(marker1, marker2):
"""
The actual comparator function.
"""
matches_count = marker_weight(matches, marker2) - marker_weight(matches, marker1)
matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
if matches_count:
return matches_count
len_diff = len(marker2) - len(marker1)
if len_diff:
return len_diff
# give preference to rightmost path
return markers.index(marker2) - markers.index(marker1)
return comparator
def marker_sorted(markers, matches):
def marker_sorted(markers, matches, predicate=marker_comparator_predicate):
"""
Sort markers from matches, from the most valuable to the less.
:param fileparts:
:type fileparts:
:param markers:
:type markers:
:param matches:
:type matches:
:param predicate:
:return:
:rtype:
"""
return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers)))
return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers, predicate=predicate)))

View file

@ -42,7 +42,7 @@ def _is_int(string):
return False
def _guess_day_first_parameter(groups):
def _guess_day_first_parameter(groups): # pylint:disable=inconsistent-return-statements
"""
If day_first is not defined, use some heuristic to fix it.
It helps to solve issues with python dateutils 2.5.3 parser changes.
@ -57,17 +57,17 @@ def _guess_day_first_parameter(groups):
if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
return False
# If match ends with a long year, the day_first is forced to true.
elif _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
if _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
return True
# If match starts with a short year, then day_first is force to false.
elif _is_int(groups[0]) and int(groups[0][:2]) > 31:
if _is_int(groups[0]) and int(groups[0][:2]) > 31:
return False
# If match ends with a short year, then day_first is force to true.
elif _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
if _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
return True
def search_date(string, year_first=None, day_first=None):
def search_date(string, year_first=None, day_first=None): # pylint:disable=inconsistent-return-statements
"""Looks for date patterns, and if found return the date and group span.
Assumes there are sentinels at the beginning and end of the string that
@ -84,42 +84,42 @@ def search_date(string, year_first=None, day_first=None):
>>> search_date(' no date in here ')
"""
start, end = None, None
match = None
groups = None
for date_re in date_regexps:
search_match = date_re.search(string)
if search_match and (match is None or search_match.end() - search_match.start() > len(match)):
start, end = search_match.start(1), search_match.end(1)
groups = search_match.groups()[1:]
match = '-'.join(groups)
if not search_match:
continue
if match is None:
return
start, end = search_match.start(1), search_match.end(1)
groups = search_match.groups()[1:]
match = '-'.join(groups)
if year_first and day_first is None:
day_first = False
if match is None:
continue
if day_first is None:
day_first = _guess_day_first_parameter(groups)
if year_first and day_first is None:
day_first = False
# If day_first/year_first is undefined, parse is made using both possible values.
yearfirst_opts = [False, True]
if year_first is not None:
yearfirst_opts = [year_first]
if day_first is None:
day_first = _guess_day_first_parameter(groups)
dayfirst_opts = [True, False]
if day_first is not None:
dayfirst_opts = [day_first]
# If day_first/year_first is undefined, parse is made using both possible values.
yearfirst_opts = [False, True]
if year_first is not None:
yearfirst_opts = [year_first]
kwargs_list = ({'dayfirst': d, 'yearfirst': y} for d in dayfirst_opts for y in yearfirst_opts)
for kwargs in kwargs_list:
try:
date = parser.parse(match, **kwargs)
except (ValueError, TypeError): # pragma: no cover
# see https://bugs.launchpad.net/dateutil/+bug/1247643
date = None
dayfirst_opts = [True, False]
if day_first is not None:
dayfirst_opts = [day_first]
# check date plausibility
if date and valid_year(date.year): # pylint:disable=no-member
return start, end, date.date() # pylint:disable=no-member
kwargs_list = ({'dayfirst': d, 'yearfirst': y}
for d in dayfirst_opts for y in yearfirst_opts)
for kwargs in kwargs_list:
try:
date = parser.parse(match, **kwargs)
except (ValueError, TypeError): # pragma: no cover
# see https://bugs.launchpad.net/dateutil/+bug/1247643
date = None
# check date plausibility
if date and valid_year(date.year): # pylint:disable=no-member
return start, end, date.date() # pylint:disable=no-member

View file

@ -0,0 +1,53 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Expected property factory
"""
import re
from rebulk import Rebulk
from rebulk.utils import find_all
from . import dash, seps
def build_expected_function(context_key):
"""
Creates a expected property function
:param context_key:
:type context_key:
:param cleanup:
:type cleanup:
:return:
:rtype:
"""
def expected(input_string, context):
"""
Expected property functional pattern.
:param input_string:
:type input_string:
:param context:
:type context:
:return:
:rtype:
"""
ret = []
for search in context.get(context_key):
if search.startswith('re:'):
search = search[3:]
search = search.replace(' ', '-')
matches = Rebulk().regex(search, abbreviations=[dash], flags=re.IGNORECASE) \
.matches(input_string, context)
for match in matches:
ret.append(match.span)
else:
value = search
for sep in seps:
input_string = input_string.replace(sep, ' ')
search = search.replace(sep, ' ')
for start in find_all(input_string, search, ignore_case=True):
ret.append({'start': start, 'end': start + len(search), 'value': value})
return ret
return expected

View file

@ -25,7 +25,7 @@ def _potential_before(i, input_string):
:return:
:rtype: bool
"""
return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps
return i - 2 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
def _potential_after(i, input_string):

View file

@ -0,0 +1,27 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Pattern utility functions
"""
def is_disabled(context, name):
"""Whether a specific pattern is disabled.
The context object might define an inclusion list (includes) or an exclusion list (excludes)
A pattern is considered disabled if it's found in the exclusion list or
it's not found in the inclusion list and the inclusion list is not empty or not defined.
:param context:
:param name:
:return:
"""
if not context:
return False
excludes = context.get('excludes')
if excludes and name in excludes:
return True
includes = context.get('includes')
return includes and name not in includes

View file

@ -0,0 +1,106 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Quantities: Size
"""
import re
from abc import abstractmethod
import six
from ..common import seps
class Quantity(object):
"""
Represent a quantity object with magnitude and units.
"""
parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')
def __init__(self, magnitude, units):
self.magnitude = magnitude
self.units = units
@classmethod
@abstractmethod
def parse_units(cls, value):
"""
Parse a string to a proper unit notation.
"""
raise NotImplementedError
@classmethod
def fromstring(cls, string):
"""
Parse the string into a quantity object.
:param string:
:return:
"""
values = cls.parser_re.match(string).groupdict()
try:
magnitude = int(values['magnitude'])
except ValueError:
magnitude = float(values['magnitude'])
units = cls.parse_units(values['units'])
return cls(magnitude, units)
def __hash__(self):
return hash(str(self))
def __eq__(self, other):
if isinstance(other, six.string_types):
return str(self) == other
if not isinstance(other, self.__class__):
return NotImplemented
return self.magnitude == other.magnitude and self.units == other.units
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<{0} [{1}]>'.format(self.__class__.__name__, self)
def __str__(self):
return '{0}{1}'.format(self.magnitude, self.units)
class Size(Quantity):
"""
Represent size.
e.g.: 1.1GB, 300MB
"""
@classmethod
def parse_units(cls, value):
return value.strip(seps).upper()
class BitRate(Quantity):
"""
Represent bit rate.
e.g.: 320Kbps, 1.5Mbps
"""
@classmethod
def parse_units(cls, value):
value = value.strip(seps).capitalize()
for token in ('bits', 'bit'):
value = value.replace(token, 'bps')
return value
class FrameRate(Quantity):
"""
Represent frame rate.
e.g.: 24fps, 60fps
"""
@classmethod
def parse_units(cls, value):
return 'fps'

View file

@ -5,7 +5,7 @@ Words utils
"""
from collections import namedtuple
from guessit.rules.common import seps
from . import seps
_Word = namedtuple('_Word', ['span', 'value'])
@ -32,46 +32,3 @@ def iter_words(string):
i += 1
if inside_word:
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
# list of common words which could be interpreted as properties, but which
# are far too common to be able to say they represent a property in the
# middle of a string (where they most likely carry their commmon meaning)
COMMON_WORDS = frozenset([
# english words
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb',
'bt', 'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice',
'ay', 'at', 'star', 'so', 'he', 'do', 'ax', 'mx',
# french words
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
'ne', 'ma', 'va', 'au', 'lu',
# japanese words,
'wa', 'ga', 'ao',
# spanish words
'la', 'el', 'del', 'por', 'mar', 'al',
# other
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
'vi', 'ben', 'da', 'lt', 'ch', 'sr', 'ps', 'cx', 'vo',
# new from babelfish
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
'fer', 'fun', 'two', 'big', 'psy', 'air',
# movie title
'brazil', 'jordan',
# release groups
'bs', # Bosnian
'kz',
# countries
'gt', 'lt', 'im',
# part/pt
'pt',
# screener
'scr',
# quality
'sd', 'hr'
])

View file

@ -6,17 +6,20 @@ Groups markers (...), [...] and {...}
from rebulk import Rebulk
def groups():
def groups(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()
rebulk.defaults(name="group", marker=True)
starting = '([{'
ending = ')]}'
starting = config['starting']
ending = config['ending']
def mark_groups(input_string):
"""

View file

@ -8,9 +8,12 @@ from rebulk import Rebulk
from rebulk.utils import find_all
def path():
def path(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
@ -22,6 +25,7 @@ def path():
Functional pattern to mark path elements.
:param input_string:
:param context:
:return:
"""
ret = []

View file

@ -9,19 +9,17 @@ import copy
import six
from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
from guessit.rules.common.words import iter_words
from .common import seps_no_groups
from .common.formatters import cleanup
from .common.comparators import marker_sorted
from .common.date import valid_year
from .common.words import iter_words
class EnlargeGroupMatches(CustomRule):
"""
Enlarge matches that are starting and/or ending group to include brackets in their span.
:param matches:
:type matches:
:return:
:rtype:
"""
priority = PRE_PROCESS
@ -36,8 +34,7 @@ class EnlargeGroupMatches(CustomRule):
for match in matches.ending(group.end - 1):
ending.append(match)
if starting or ending:
return starting, ending
return starting, ending
def then(self, matches, when_response, context):
starting, ending = when_response
@ -89,21 +86,27 @@ class EquivalentHoles(Rule):
class RemoveAmbiguous(Rule):
"""
If multiple match are found with same name and different values, keep the one in the most valuable filepart.
If multiple matches are found with same name and different values, keep the one in the most valuable filepart.
Also keep others match with same name and values than those kept ones.
"""
priority = POST_PROCESS
consequence = RemoveMatch
def __init__(self, sort_function=marker_sorted, predicate=None):
super(RemoveAmbiguous, self).__init__()
self.sort_function = sort_function
self.predicate = predicate
def when(self, matches, context):
fileparts = marker_sorted(matches.markers.named('path'), matches)
fileparts = self.sort_function(matches.markers.named('path'), matches)
previous_fileparts_names = set()
values = defaultdict(list)
to_remove = []
for filepart in fileparts:
filepart_matches = matches.range(filepart.start, filepart.end)
filepart_matches = matches.range(filepart.start, filepart.end, predicate=self.predicate)
filepart_names = set()
for match in filepart_matches:
@ -120,6 +123,19 @@ class RemoveAmbiguous(Rule):
return to_remove
class RemoveLessSpecificSeasonEpisode(RemoveAmbiguous):
"""
If multiple season/episodes matches are found with different values,
keep the one tagged as 'SxxExx' or in the rightmost filepart.
"""
def __init__(self, name):
super(RemoveLessSpecificSeasonEpisode, self).__init__(
sort_function=(lambda markers, matches:
marker_sorted(list(reversed(markers)), matches,
lambda match: match.name == name and 'SxxExx' in match.tags)),
predicate=lambda match: match.name == name)
def _preferred_string(value1, value2): # pylint:disable=too-many-return-statements
"""
Retrieves preferred title from both values.
@ -176,6 +192,23 @@ class SeasonYear(Rule):
return ret
class YearSeason(Rule):
"""
If a year is found, no season found, and episode is found, create an match with season.
"""
priority = POST_PROCESS
consequence = AppendMatch
def when(self, matches, context):
ret = []
if not matches.named('season') and matches.named('episode'):
for year in matches.named('year'):
season = copy.copy(year)
season.name = 'season'
ret.append(season)
return ret
class Processors(CustomRule):
"""
Empty rule for ordering post_processing properly.
@ -189,10 +222,36 @@ class Processors(CustomRule):
pass
def processors():
class StripSeparators(CustomRule):
"""
Strip separators from matches. Keep separators if they are from acronyms, like in ".S.H.I.E.L.D."
"""
priority = POST_PROCESS
def when(self, matches, context):
return matches
def then(self, matches, when_response, context): # pragma: no cover
for match in matches:
for _ in range(0, len(match.span)):
if match.raw[0] in seps_no_groups and (len(match.raw) < 3 or match.raw[2] not in seps_no_groups):
match.raw_start += 1
for _ in reversed(range(0, len(match.span))):
if match.raw[-1] in seps_no_groups and (len(match.raw) < 3 or match.raw[-3] not in seps_no_groups):
match.raw_end -= 1
def processors(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles, RemoveAmbiguous, SeasonYear, Processors)
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
RemoveLessSpecificSeasonEpisode('season'),
RemoveLessSpecificSeasonEpisode('episode'),
RemoveAmbiguous, SeasonYear, YearSeason, Processors, StripSeparators)

View file

@ -6,15 +6,20 @@ audio_codec, audio_profile and audio_channels property
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after
audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
def audio_codec():
def audio_codec(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
@ -36,34 +41,49 @@ def audio_codec():
return match1
return '__default__'
rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)
rebulk.defaults(name='audio_codec',
conflict_solver=audio_codec_priority,
disabled=lambda context: is_disabled(context, 'audio_codec'))
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
rebulk.regex("Dolby", "DolbyDigital", "Dolby-Digital", "DD", value="DolbyDigital")
rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
rebulk.regex("AAC", value="AAC")
rebulk.regex("AC3D?", value="AC3")
rebulk.regex("Flac", value="FLAC")
rebulk.regex("DTS", value="DTS")
rebulk.regex("True-?HD", value="TrueHD")
rebulk.string("MP2", value="MP2")
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital')
rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos')
rebulk.string("AAC", value="AAC")
rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus')
rebulk.string("Flac", value="FLAC")
rebulk.string("DTS", value="DTS")
rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD',
conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__')
rebulk.regex('True-?HD', value='Dolby TrueHD')
rebulk.string('Opus', value='Opus')
rebulk.string('Vorbis', value='Vorbis')
rebulk.string('PCM', value='PCM')
rebulk.string('LPCM', value='LPCM')
rebulk.defaults(name="audio_profile")
rebulk.string("HD", value="HD", tags="DTS")
rebulk.regex("HD-?MA", value="HDMA", tags="DTS")
rebulk.string("HE", value="HE", tags="AAC")
rebulk.string("LC", value="LC", tags="AAC")
rebulk.string("HQ", value="HQ", tags="AC3")
rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC'])
rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC'])
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.defaults(name="audio_channels")
rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True)
rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True)
rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True)
rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True)
rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True)
rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True)
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
rebulk.string('7ch', '8ch', value='7.1')
rebulk.string('5ch', '6ch', value='5.1')
rebulk.string('2ch', 'stereo', value='2.0')
rebulk.string('1ch', 'mono', value='1.0')
rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule)
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
AudioChannelsValidatorRule)
return rebulk
@ -108,25 +128,49 @@ class AudioProfileRule(Rule):
super(AudioProfileRule, self).__init__()
self.codec = codec
def enabled(self, context):
return not is_disabled(context, 'audio_profile')
def when(self, matches, context):
profile_list = matches.named('audio_profile', lambda match: self.codec in match.tags)
profile_list = matches.named('audio_profile',
lambda match: 'audio_profile.rule' in match.tags and
self.codec in match.tags)
ret = []
for profile in profile_list:
codec = matches.previous(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
codec = matches.at_span(profile.span,
lambda match: match.name == 'audio_codec' and
match.value == self.codec, 0)
if not codec:
codec = matches.next(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
codec = matches.previous(profile,
lambda match: match.name == 'audio_codec' and
match.value == self.codec)
if not codec:
codec = matches.next(profile,
lambda match: match.name == 'audio_codec' and
match.value == self.codec)
if not codec:
ret.append(profile)
if codec:
ret.extend(matches.conflicting(profile))
return ret
class DtsHDRule(AudioProfileRule):
"""
Rule to validate DTS-HD profile
"""
def __init__(self):
super(DtsHDRule, self).__init__('DTS-HD')
class DtsRule(AudioProfileRule):
"""
Rule to validate DTS profile
"""
def __init__(self):
super(DtsRule, self).__init__("DTS")
super(DtsRule, self).__init__('DTS')
class AacRule(AudioProfileRule):
@ -135,16 +179,16 @@ class AacRule(AudioProfileRule):
"""
def __init__(self):
super(AacRule, self).__init__("AAC")
super(AacRule, self).__init__('AAC')
class Ac3Rule(AudioProfileRule):
class DolbyDigitalRule(AudioProfileRule):
"""
Rule to validate AC3 profile
Rule to validate Dolby Digital profile
"""
def __init__(self):
super(Ac3Rule, self).__init__("AC3")
super(DolbyDigitalRule, self).__init__('Dolby Digital')
class HqConflictRule(Rule):
@ -152,13 +196,35 @@ class HqConflictRule(Rule):
Solve conflict between HQ from other property and from audio_profile.
"""
dependency = [DtsRule, AacRule, Ac3Rule]
dependency = [DtsHDRule, DtsRule, AacRule, DolbyDigitalRule]
consequence = RemoveMatch
def when(self, matches, context):
hq_audio = matches.named('audio_profile', lambda match: match.value == 'HQ')
hq_audio_spans = [match.span for match in hq_audio]
hq_other = matches.named('other', lambda match: match.span in hq_audio_spans)
def enabled(self, context):
return not is_disabled(context, 'audio_profile')
if hq_other:
return hq_other
def when(self, matches, context):
hq_audio = matches.named('audio_profile', lambda m: m.value == 'High Quality')
hq_audio_spans = [match.span for match in hq_audio]
return matches.named('other', lambda m: m.span in hq_audio_spans)
class AudioChannelsValidatorRule(Rule):
"""
Remove audio_channel if no audio codec as previous match.
"""
priority = 128
consequence = RemoveMatch
def enabled(self, context):
return not is_disabled(context, 'audio_channels')
def when(self, matches, context):
ret = []
for audio_channel in matches.tagged('weak-audio_channels'):
valid_before = matches.range(audio_channel.start - 1, audio_channel.start,
lambda match: match.name == 'audio_codec')
if not valid_before:
ret.append(audio_channel)
return ret

View file

@ -0,0 +1,72 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
video_bit_rate and audio_bit_rate properties
"""
import re
from rebulk import Rebulk
from rebulk.rules import Rule, RemoveMatch, RenameMatch
from ..common import dash, seps
from ..common.pattern import is_disabled
from ..common.quantity import BitRate
from ..common.validators import seps_surround
def bit_rate(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
and is_disabled(context, 'video_bit_rate')))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
conflict_solver=(
lambda match, other: match
if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
else other
),
formatter=BitRate.fromstring, tags=['release-group-prefix'])
rebulk.rules(BitRateTypeRule)
return rebulk
class BitRateTypeRule(Rule):
"""
Convert audio bit rate guess into video bit rate.
"""
consequence = [RenameMatch('video_bit_rate'), RemoveMatch]
def when(self, matches, context):
to_rename = []
to_remove = []
if is_disabled(context, 'audio_bit_rate'):
to_remove.extend(matches.named('audio_bit_rate'))
else:
video_bit_rate_disabled = is_disabled(context, 'video_bit_rate')
for match in matches.named('audio_bit_rate'):
previous = matches.previous(match, index=0,
predicate=lambda m: m.name in ('source', 'screen_size', 'video_codec'))
if previous and not matches.holes(previous.end, match.start, predicate=lambda m: m.value.strip(seps)):
after = matches.next(match, index=0, predicate=lambda m: m.name == 'audio_codec')
if after and not matches.holes(match.end, after.start, predicate=lambda m: m.value.strip(seps)):
bitrate = match.value
if bitrate.units == 'Kbps' or (bitrate.units == 'Mbps' and bitrate.magnitude < 10):
continue
if video_bit_rate_disabled:
to_remove.append(match)
else:
to_rename.append(match)
return to_rename, to_remove

View file

@ -9,21 +9,26 @@ from rebulk import Rebulk, AppendMatch, Rule
from .title import TitleFromPosition
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def bonus():
def bonus(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
validator={'__parent__': lambda match: seps_surround},
conflict_solver=lambda match, conflicting: match
if conflicting.name in ['video_codec', 'episode'] and 'bonus-conflict' not in conflicting.tags
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
else '__default__')
rebulk.rules(BonusTitleRule)
@ -40,7 +45,7 @@ class BonusTitleRule(Rule):
properties = {'bonus_title': [None]}
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
if bonus_number:
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)

View file

@ -6,16 +6,22 @@ cd and cd_count properties
from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
def cds():
def cds(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
validator={'cd': lambda match: 0 < match.value < 100,

View file

@ -6,48 +6,55 @@ container property
from rebulk.remodule import re
from rebulk import Rebulk
from ..common import seps
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
from ...reutils import build_or_pattern
def container():
def container(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(name='container',
formatter=lambda value: value[1:],
formatter=lambda value: value.strip(seps),
tags=['extension'],
conflict_solver=lambda match, other: other
if other.name in ['format', 'video_codec'] or
if other.name in ('source', 'video_codec') or
other.name == 'container' and 'extension' not in other.tags
else '__default__')
subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass']
info = ['nfo']
videos = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
'iso', 'vob']
torrent = ['torrent']
subtitles = config['subtitles']
info = config['info']
videos = config['videos']
torrent = config['torrent']
nzb = config['nzb']
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
rebulk.defaults(name='container',
validator=seps_surround,
formatter=lambda s: s.upper(),
formatter=lambda s: s.lower(),
conflict_solver=lambda match, other: match
if other.name in ['format',
'video_codec'] or other.name == 'container' and 'extension' in other.tags
if other.name in ('source',
'video_codec') or other.name == 'container' and 'extension' in other.tags
else '__default__')
rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
rebulk.string(*videos, tags=['video'])
rebulk.string(*torrent, tags=['torrent'])
rebulk.string(*nzb, tags=['nzb'])
return rebulk

View file

@ -7,41 +7,50 @@ country property
import babelfish
from rebulk import Rebulk
from ..common.words import COMMON_WORDS, iter_words
from ..common.pattern import is_disabled
from ..common.words import iter_words
def country():
def country(config, common_words):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:param common_words: common words
:type common_words: set
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().defaults(name='country')
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
rebulk = rebulk.defaults(name='country')
def find_countries(string, context=None):
"""
Find countries in given string.
"""
allowed_countries = context.get('allowed_countries') if context else None
return CountryFinder(allowed_countries, common_words).find(string)
rebulk.functional(find_countries,
#  Prefer language and any other property over country if not US or GB.
conflict_solver=lambda match, other: match
if other.name != 'language' or match.value not in [babelfish.Country('US'),
babelfish.Country('GB')]
if other.name != 'language' or match.value not in (babelfish.Country('US'),
babelfish.Country('GB'))
else other,
properties={'country': [None]})
properties={'country': [None]},
disabled=lambda context: not context.get('allowed_countries'))
babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])
return rebulk
COUNTRIES_SYN = {'ES': ['españa'],
'GB': ['UK'],
'BR': ['brazilian', 'bra'],
'CA': ['québec', 'quebec', 'qc'],
# FIXME: this one is a bit of a stretch, not sure how to do it properly, though...
'MX': ['Latinoamérica', 'latin america']}
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
def __init__(self):
def __init__(self, synonyms):
self.guessit_exceptions = {}
for alpha2, synlist in COUNTRIES_SYN.items():
for alpha2, synlist in synonyms.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = alpha2
@ -56,7 +65,7 @@ class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: dis
return 'UK'
return str(babelfish.Country(alpha2))
def reverse(self, name):
def reverse(self, name): # pylint:disable=arguments-differ
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
@ -78,32 +87,28 @@ class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: dis
raise babelfish.CountryReverseError(name)
babelfish.country_converters['guessit'] = GuessitCountryConverter()
class CountryFinder(object):
"""Helper class to search and return country matches."""
def __init__(self, allowed_countries, common_words):
self.allowed_countries = {l.lower() for l in allowed_countries or []}
self.common_words = common_words
def is_allowed_country(country_object, context=None):
"""
Check if country is allowed.
"""
if context and context.get('allowed_countries'):
allowed_countries = context.get('allowed_countries')
return country_object.name.lower() in allowed_countries or country_object.alpha2.lower() in allowed_countries
return True
def find(self, string):
"""Return all matches for country."""
for word_match in iter_words(string.strip().lower()):
word = word_match.value
if word.lower() in self.common_words:
continue
try:
country_object = babelfish.Country.fromguessit(word)
if (country_object.name.lower() in self.allowed_countries or
country_object.alpha2.lower() in self.allowed_countries):
yield self._to_rebulk_match(word_match, country_object)
except babelfish.Error:
continue
def find_countries(string, context=None):
"""
Find countries in given string.
"""
ret = []
for word_match in iter_words(string.strip().lower()):
word = word_match.value
if word.lower() in COMMON_WORDS:
continue
try:
country_object = babelfish.Country.fromguessit(word)
if is_allowed_country(country_object, context):
ret.append((word_match.span[0], word_match.span[1], {'value': country_object}))
except babelfish.Error:
continue
return ret
@classmethod
def _to_rebulk_match(cls, word, value):
return word.span[0], word.span[1], {'value': value}

View file

@ -6,20 +6,25 @@ crc and uuid properties
from rebulk.remodule import re
from rebulk import Rebulk
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def crc():
def crc(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(validator=seps_surround)
rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
conflict_solver=lambda match, other: match
conflict_solver=lambda match, other: other
if other.name in ['episode', 'season']
else '__default__')

View file

@ -6,21 +6,29 @@ date and year properties
from rebulk import Rebulk, RemoveMatch, Rule
from ..common.date import search_date, valid_year
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def date():
def date(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().defaults(validator=seps_surround)
rebulk.regex(r"\d{4}", name="year", formatter=int,
disabled=lambda context: is_disabled(context, 'year'),
conflict_solver=lambda match, other: other
if other.name in ('episode', 'season') and len(other.raw) < len(match.raw)
else '__default__',
validator=lambda match: seps_surround(match) and valid_year(match.value))
def date_functional(string, context):
def date_functional(string, context): # pylint:disable=inconsistent-return-statements
"""
Search for date in the string and retrieves match
@ -33,8 +41,9 @@ def date():
return ret[0], ret[1], {'value': ret[2]}
rebulk.functional(date_functional, name="date", properties={'date': [None]},
disabled=lambda context: is_disabled(context, 'date'),
conflict_solver=lambda match, other: other
if other.name in ['episode', 'season']
if other.name in ('episode', 'season', 'crc32')
else '__default__')
rebulk.rules(KeepMarkedYearInFilepart)
@ -49,6 +58,9 @@ class KeepMarkedYearInFilepart(Rule):
priority = 64
consequence = RemoveMatch
def enabled(self, context):
return not is_disabled(context, 'year')
def when(self, matches, context):
ret = []
if len(matches.named('year')) > 1:

View file

@ -7,25 +7,46 @@ from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def edition():
def edition(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name='edition', validator=seps_surround)
rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition')
rebulk.regex('special-edition', 'edition-special', value='Special Edition',
rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
rebulk.regex('special-edition', 'edition-special', value='Special',
conflict_solver=lambda match, other: other
if other.name == 'episode_details' and other.value == 'Special'
else '__default__')
rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
rebulk.regex('director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', value='Director\'s cut')
rebulk.string('se', value='Special', tags='has-neighbor')
rebulk.string('ddc', value="Director's Definitive Cut")
rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
value="Director's Cut")
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
rebulk.regex('imax', 'imax-edition', value='IMAX')
rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
rebulk.regex('ultimate-edition', value='Ultimate')
rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])
return rebulk

View file

@ -5,26 +5,92 @@ Episode title
"""
from collections import defaultdict
from rebulk import Rebulk, Rule, AppendMatch, RenameMatch
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PROCESS
from ..common import seps, title_seps
from ..properties.title import TitleFromPosition, TitleBaseRule
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..properties.title import TitleFromPosition, TitleBaseRule
from ..properties.type import TypeProcessor
def episode_title():
def episode_title(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().rules(EpisodeTitleFromPosition,
AlternativeTitleReplace,
TitleToEpisodeTitle,
Filepart3EpisodeTitle,
Filepart2EpisodeTitle)
previous_names = ('episode', 'episode_count',
'season', 'season_count', 'date', 'title', 'year')
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'episode_title'))
rebulk = rebulk.rules(RemoveConflictsWithEpisodeTitle(previous_names),
EpisodeTitleFromPosition(previous_names),
AlternativeTitleReplace(previous_names),
TitleToEpisodeTitle,
Filepart3EpisodeTitle,
Filepart2EpisodeTitle,
RenameEpisodeTitleWhenMovieType)
return rebulk
class RemoveConflictsWithEpisodeTitle(Rule):
"""
Remove conflicting matches that might lead to wrong episode_title parsing.
"""
priority = 64
consequence = RemoveMatch
def __init__(self, previous_names):
super(RemoveConflictsWithEpisodeTitle, self).__init__()
self.previous_names = previous_names
self.next_names = ('streaming_service', 'screen_size', 'source',
'video_codec', 'audio_codec', 'other', 'container')
self.affected_if_holes_after = ('part', )
self.affected_names = ('part', 'year')
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end,
predicate=lambda m: m.name in self.affected_names):
before = matches.range(filepart.start, match.start, predicate=lambda m: not m.private, index=-1)
if not before or before.name not in self.previous_names:
continue
after = matches.range(match.end, filepart.end, predicate=lambda m: not m.private, index=0)
if not after or after.name not in self.next_names:
continue
group = matches.markers.at_match(match, predicate=lambda m: m.name == 'group', index=0)
def has_value_in_same_group(current_match, current_group=group):
"""Return true if current match has value and belongs to the current group."""
return current_match.value.strip(seps) and (
current_group == matches.markers.at_match(current_match,
predicate=lambda mm: mm.name == 'group', index=0)
)
holes_before = matches.holes(before.end, match.start, predicate=has_value_in_same_group)
holes_after = matches.holes(match.end, after.start, predicate=has_value_in_same_group)
if not holes_before and not holes_after:
continue
if match.name in self.affected_if_holes_after and not holes_after:
continue
to_remove.append(match)
if match.parent:
to_remove.append(match.parent)
return to_remove
class TitleToEpisodeTitle(Rule):
"""
If multiple different title are found, convert the one following episode number to episode_title.
@ -33,24 +99,19 @@ class TitleToEpisodeTitle(Rule):
def when(self, matches, context):
titles = matches.named('title')
if len(titles) < 2:
return
title_groups = defaultdict(list)
for title in titles:
title_groups[title.value].append(title)
episode_titles = []
main_titles = []
if len(title_groups) < 2:
return episode_titles
for title in titles:
if matches.previous(title, lambda match: match.name == 'episode'):
episode_titles.append(title)
else:
main_titles.append(title)
if episode_titles:
return episode_titles
return episode_titles
def then(self, matches, when_response, context):
for title in when_response:
@ -66,12 +127,14 @@ class EpisodeTitleFromPosition(TitleBaseRule):
"""
dependency = TitleToEpisodeTitle
def __init__(self, previous_names):
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
self.previous_names = previous_names
def hole_filter(self, hole, matches):
episode = matches.previous(hole,
lambda previous: any(name in previous.names
for name in ['episode', 'episode_details',
'episode_count', 'season', 'season_count',
'date', 'title', 'year']),
for name in self.previous_names),
0)
crc32 = matches.named('crc32')
@ -89,10 +152,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
return False
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
def __init__(self):
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.named('episode_title'):
return
return super(EpisodeTitleFromPosition, self).when(matches, context)
@ -105,7 +165,11 @@ class AlternativeTitleReplace(Rule):
dependency = EpisodeTitleFromPosition
consequence = RenameMatch
def when(self, matches, context):
def __init__(self, previous_names):
super(AlternativeTitleReplace, self).__init__()
self.previous_names = previous_names
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.named('episode_title'):
return
@ -116,10 +180,7 @@ class AlternativeTitleReplace(Rule):
if main_title:
episode = matches.previous(main_title,
lambda previous: any(name in previous.names
for name in ['episode', 'episode_details',
'episode_count', 'season',
'season_count',
'date', 'title', 'year']),
for name in self.previous_names),
0)
crc32 = matches.named('crc32')
@ -130,9 +191,31 @@ class AlternativeTitleReplace(Rule):
def then(self, matches, when_response, context):
matches.remove(when_response)
when_response.name = 'episode_title'
when_response.tags.append('alternative-replaced')
matches.append(when_response)
class RenameEpisodeTitleWhenMovieType(Rule):
"""
Rename episode_title by alternative_title when type is movie.
"""
priority = POST_PROCESS
dependency = TypeProcessor
consequence = RenameMatch
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.named('episode_title', lambda m: 'alternative-replaced' not in m.tags) \
and not matches.named('type', lambda m: m.value == 'episode'):
return matches.named('episode_title')
def then(self, matches, when_response, context):
for match in when_response:
matches.remove(match)
match.name = 'alternative_title'
matches.append(match)
class Filepart3EpisodeTitle(Rule):
"""
If we have at least 3 filepart structured like this:
@ -140,12 +223,18 @@ class Filepart3EpisodeTitle(Rule):
Serie name/SO1/E01-episode_title.mkv
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
Serie name/SO1/episode_title-E01.mkv
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
If CCCC contains episode and BBB contains seasonNumber
Then title is to be found in AAAA.
"""
consequence = AppendMatch('title')
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.tagged('filepart-title'):
return
fileparts = matches.markers.named('path')
if len(fileparts) < 3:
return
@ -160,6 +249,7 @@ class Filepart3EpisodeTitle(Rule):
if season:
hole = matches.holes(subdirectory.start, subdirectory.end,
ignore=lambda match: 'weak-episode' in match.tags,
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
index=0)
if hole:
@ -174,11 +264,22 @@ class Filepart2EpisodeTitle(Rule):
AAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
If BBBB contains episode and AAA contains a hole followed by seasonNumber
Then title is to be found in AAAA.
then title is to be found in AAAA.
or
Serie name/SO1E01-episode_title.mkv
AAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
If BBBB contains season and episode and AAA contains a hole
then title is to be found in AAAA.
"""
consequence = AppendMatch('title')
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.tagged('filepart-title'):
return
fileparts = matches.markers.named('path')
if len(fileparts) < 2:
return
@ -188,9 +289,12 @@ class Filepart2EpisodeTitle(Rule):
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
if episode_number:
season = matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0)
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
if season:
hole = matches.holes(directory.start, directory.end, formatter=cleanup, seps=title_seps,
hole = matches.holes(directory.start, directory.end, ignore=lambda match: 'weak-episode' in match.tags,
formatter=cleanup, seps=title_seps,
predicate=lambda match: match.value, index=0)
if hole:
hole.tags.append('filepart-title')
return hole

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
episode, season, episode_count, season_count and episode_details properties
episode, season, disc, episode_count, season_count and episode_details properties
"""
import copy
from collections import defaultdict
@ -12,23 +12,52 @@ from rebulk.remodule import re
from rebulk.utils import is_iterable
from .title import TitleFromPosition
from ..common import dash, alt_dash, seps
from ..common import dash, alt_dash, seps, seps_no_fs
from ..common.formatters import strip
from ..common.numeral import numeral, parse_numeral
from ..common.pattern import is_disabled
from ..common.validators import compose, seps_surround, seps_before, int_coercable
from ...reutils import build_or_pattern
def episodes():
def episodes(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
def is_season_episode_disabled(context):
"""Whether season and episode rules should be enabled."""
return is_disabled(context, 'episode') or is_disabled(context, 'season')
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
def episodes_season_chain_breaker(matches):
"""
Break chains if there's more than 100 offset between two neighbor values.
:param matches:
:type matches:
:return:
:rtype:
"""
eps = matches.named('episode')
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
return True
seasons = matches.named('season')
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
return True
return False
rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
def season_episode_conflict_solver(match, other):
"""
@ -38,38 +67,41 @@ def episodes():
:param other:
:return:
"""
if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec',
'audio_codec', 'audio_channels',
'container', 'date']:
return match
elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \
and match.initiator != other.initiator:
if 'weak-episode' in match.tags:
if match.name != other.name:
if match.name == 'episode' and other.name == 'year':
return match
if 'weak-episode' in other.tags:
return other
if 'x' in match.initiator.raw.lower():
return match
if 'x' in other.initiator.raw.lower():
return other
if match.name in ('season', 'episode'):
if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
return match
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
and not match.initiator.children.named(match.name + 'Marker')) or (
other.name == 'screen_size' and not int_coercable(other.raw)):
return match
if other.name in ('season', 'episode') and match.initiator != other.initiator:
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
and other.initiator.name in ('weak_episode', 'weak_duplicate')):
return '__default__'
for current in (match, other):
if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
return current
return '__default__'
season_episode_seps = []
season_episode_seps.extend(seps)
season_episode_seps.extend(['x', 'X', 'e', 'E'])
season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series']
episode_words = ['episode', 'episodes', 'ep']
of_words = ['of', 'sur']
all_words = ['All']
season_markers = ["S"]
season_ep_markers = ["x"]
episode_markers = ["xE", "Ex", "EP", "E", "x"]
range_separators = ['-', '~', 'to', 'a']
weak_discrete_separators = list(sep for sep in seps if sep not in range_separators)
strong_discrete_separators = ['+', '&', 'and', 'et']
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
max_range_gap = config['max_range_gap']
def ordering_validator(match):
"""
Validator for season list. They should be in natural order to be validated.
@ -77,7 +109,7 @@ def episodes():
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
"""
values = match.children.to_dict(implicit=True)
values = match.children.to_dict()
if 'season' in values and is_iterable(values['season']):
# Season numbers must be in natural order to be validated.
if not list(sorted(values['season'])) == values['season']:
@ -104,7 +136,7 @@ def episodes():
separator = match.children.previous(current_match,
lambda m: m.name == property_name + 'Separator', 0)
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not current_match.value - previous_match.value == 1:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
@ -122,24 +154,25 @@ def episodes():
private_parent=True,
validate_all=True,
validator={'__parent__': ordering_validator},
conflict_solver=season_episode_conflict_solver) \
.regex(build_or_pattern(season_markers) + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers) + r'@?(?P<episode>\d+)',
conflict_solver=season_episode_conflict_solver,
disabled=is_season_episode_disabled) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}).repeater('+') \
.regex(build_or_pattern(episode_markers + discrete_separators + range_separators,
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers) +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers) +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
@ -148,7 +181,7 @@ def episodes():
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(build_or_pattern(season_markers) + r'(?P<season>\d+)',
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
@ -157,12 +190,9 @@ def episodes():
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'):
rebulk.string(episode_detail, value=episode_detail, name='episode_details')
rebulk.regex(r'Extras?', name='episode_details', value='Extras')
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'],
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True)
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail, value=episode_detail, name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
def validate_roman(match):
"""
@ -176,121 +206,219 @@ def episodes():
return True
return seps_surround(match)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
conflict_solver=season_episode_conflict_solver)
rebulk.chain(abbreviations=[alt_dash],
formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'__parent__': compose(seps_surround, ordering_validator),
'season': validate_roman,
'count': validate_roman}) \
'count': validate_roman},
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
.defaults(validator=None) \
.regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
.regex(r'@?(?P<seasonSeparator>' +
build_or_pattern(range_separators + discrete_separators + ['@'], escape=True) +
r')@?(?P<season>\d+)').repeater('*')
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
name='seasonSeparator', escape=True) +
r'@?(?P<season>\d+)').repeater('*')
rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' +
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash], formatter=int,
disabled=lambda context: context.get('type') == 'episode')
abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' +
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash],
validator={'episode': validate_roman},
formatter={'episode': parse_numeral, 'version': int, 'count': int},
disabled=lambda context: context.get('type') != 'episode')
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
tags=['SxxExx'],
abbreviations=[dash],
validator=None,
formatter={'season': int, 'other': lambda match: 'Complete'})
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
formatter={'season': int, 'other': lambda match: 'Complete'},
disabled=lambda context: is_disabled(context, 'season'))
# 12, 13
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
# 012, 013
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'0(?P<episode>\d{1,2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
# 112, 113
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: not context.get('episode_prefer_number', False)) \
rebulk.chain(tags=['weak-episode'],
formatter={'episode': int, 'version': int},
name='weak_episode',
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'(?P<episode>\d{3,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
# 1, 2, 3
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') != 'episode') \
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'(?P<episode>\d)') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
# e112, e113
# e112, e113, 1e18, 3e19
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
rebulk.chain(formatter={'episode': int, 'version': int}) \
rebulk.chain(formatter={'season': int, 'episode': int, 'version': int},
disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
# ep 112, ep113, ep112, ep113
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int},
disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'ep-?(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
# cap 112, cap 112_114
rebulk.chain(abbreviations=[dash],
tags=['see-pattern'],
formatter={'season': int, 'episode': int},
disabled=is_season_episode_disabled) \
.defaults(validator=None) \
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
# 102, 0102
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'],
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
formatter={'season': int, 'episode': int, 'version': int},
conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
disabled=lambda context: context.get('episode_prefer_number', False)) \
name='weak_duplicate',
conflict_solver=season_episode_conflict_solver,
disabled=lambda context: (context.get('episode_prefer_number', False) or
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
.defaults(validator=None) \
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int)
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int,
disabled=lambda context: is_disabled(context, 'version'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
# TODO: List of words
# detached of X count (season/episode)
rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) +
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
abbreviations=[dash], children=True, private_parent=True, formatter=int)
abbreviations=[dash], children=True, private_parent=True, formatter=int,
disabled=lambda context: is_disabled(context, 'episode'))
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode")
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode",
disabled=lambda context: is_disabled(context, 'episode_format'))
# Harcoded movie to disable weak season/episodes
rebulk.regex('OSS-?117',
abbreviations=[dash], name="hardcoded-movies", marker=True,
conflict_solver=lambda match, other: None)
rebulk.rules(EpisodeNumberSeparatorRange(range_separators),
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
SeePatternRange(range_separators + ['_']),
EpisodeNumberSeparatorRange(range_separators),
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
CountValidator, EpisodeSingleDigitValidator)
RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
return rebulk
class WeakConflictSolver(Rule):
"""
Rule to decide whether weak-episode or weak-duplicate matches should be kept.
If an anime is detected:
- weak-duplicate matches should be removed
- weak-episode matches should be tagged as anime
Otherwise:
- weak-episode matches are removed unless they're part of an episode range match.
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def enabled(self, context):
return context.get('type') != 'movie'
@classmethod
def is_anime(cls, matches):
"""Return True if it seems to be an anime.
Anime characteristics:
- version, crc32 matches
- screen_size inside brackets
- release_group at start and inside brackets
"""
if matches.named('version') or matches.named('crc32'):
return True
for group in matches.markers.named('group'):
if matches.range(group.start, group.end, predicate=lambda m: m.name == 'screen_size'):
return True
if matches.markers.starting(group.start, predicate=lambda m: m.name == 'path'):
hole = matches.holes(group.start, group.end, index=0)
if hole and hole.raw == group.raw:
return True
return False
def when(self, matches, context):
to_remove = []
to_append = []
anime_detected = self.is_anime(matches)
for filepart in matches.markers.named('path'):
weak_matches = matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.initiator.name == 'weak_episode'))
weak_dup_matches = matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.initiator.name == 'weak_duplicate'))
if anime_detected:
if weak_matches:
to_remove.extend(weak_dup_matches)
for match in matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.name == 'episode' and m.initiator.name != 'weak_duplicate')):
episode = copy.copy(match)
episode.tags = episode.tags + ['anime']
to_append.append(episode)
to_remove.append(match)
elif weak_dup_matches:
episodes_in_range = matches.range(filepart.start, filepart.end, predicate=(
lambda m:
m.name == 'episode' and m.initiator.name == 'weak_episode'
and m.initiator.children.named('episodeSeparator')
))
if not episodes_in_range and not matches.range(filepart.start, filepart.end,
predicate=lambda m: 'SxxExx' in m.tags):
to_remove.extend(weak_matches)
else:
for match in episodes_in_range:
episode = copy.copy(match)
episode.tags = []
to_append.append(episode)
to_remove.append(match)
if to_append:
to_remove.extend(weak_dup_matches)
return to_remove, to_append
class CountValidator(Rule):
"""
Validate count property and rename it
@ -317,6 +445,41 @@ class CountValidator(Rule):
return to_remove, episode_count, season_count
class SeePatternRange(Rule):
"""
Create matches for episode range for SEE pattern. E.g.: Cap.102_104
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators):
super(SeePatternRange, self).__init__()
self.range_separators = range_separators
def when(self, matches, context):
to_remove = []
to_append = []
for separator in matches.tagged('see-pattern', lambda m: m.name == 'episodeSeparator'):
previous_match = matches.previous(separator, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
next_match = matches.next(separator, lambda m: m.name == 'season' and 'see-pattern' in m.tags, 0)
if not next_match:
continue
next_match = matches.next(next_match, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
if previous_match and next_match and separator.value in self.range_separators:
to_remove.append(next_match)
for episode_number in range(previous_match.value + 1, next_match.value + 1):
match = copy.copy(next_match)
match.value = episode_number
to_append.append(match)
to_remove.append(separator)
return to_remove, to_append
class AbstractSeparatorRange(Rule):
"""
Remove separator matches and create matches for season range.
@ -334,14 +497,18 @@ class AbstractSeparatorRange(Rule):
to_append = []
for separator in matches.named(self.property_name + 'Separator'):
previous_match = matches.previous(separator, lambda match: match.name == self.property_name, 0)
next_match = matches.next(separator, lambda match: match.name == self.property_name, 0)
previous_match = matches.previous(separator, lambda m: m.name == self.property_name, 0)
next_match = matches.next(separator, lambda m: m.name == self.property_name, 0)
initiator = separator.initiator
if previous_match and next_match and separator.value in self.range_separators:
to_remove.append(next_match)
for episode_number in range(previous_match.value + 1, next_match.value):
match = copy.copy(next_match)
match.value = episode_number
initiator.children.append(match)
to_append.append(match)
to_append.append(next_match)
to_remove.append(separator)
previous_match = None
@ -351,9 +518,11 @@ class AbstractSeparatorRange(Rule):
if separator not in self.range_separators:
separator = strip(separator)
if separator in self.range_separators:
initiator = previous_match.initiator
for episode_number in range(previous_match.value + 1, next_match.value):
match = copy.copy(next_match)
match.value = episode_number
initiator.children.append(match)
to_append.append(match)
to_append.append(Match(previous_match.end, next_match.start - 1,
name=self.property_name + 'Separator',
@ -367,12 +536,46 @@ class AbstractSeparatorRange(Rule):
return to_remove, to_append
class RenameToAbsoluteEpisode(Rule):
"""
Rename episode to absolute_episodes.
Absolute episodes are only used if two groups of episodes are detected:
S02E04-06 25-27
25-27 S02E04-06
2x04-06 25-27
28. Anime Name S02E05
The matches in the group with higher episode values are renamed to absolute_episode.
"""
consequence = RenameMatch('absolute_episode')
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
initiators = {match.initiator for match in matches.named('episode')
if len(match.initiator.children.named('episode')) > 1}
if len(initiators) != 2:
ret = []
for filepart in matches.markers.named('path'):
if matches.range(filepart.start + 1, filepart.end, predicate=lambda m: m.name == 'episode'):
ret.extend(
matches.starting(filepart.start, predicate=lambda m: m.initiator.name == 'weak_episode'))
return ret
initiators = sorted(initiators, key=lambda item: item.end)
if not matches.holes(initiators[0].end, initiators[1].start, predicate=lambda m: m.raw.strip(seps)):
first_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[0])
second_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[1])
if len(first_range) == len(second_range):
if second_range[0].value > first_range[0].value:
return second_range
if first_range[0].value > second_range[0].value:
return first_range
class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
"""
Remove separator matches and create matches for episoderNumber range.
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators):
super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode")
@ -382,8 +585,6 @@ class SeasonSeparatorRange(AbstractSeparatorRange):
"""
Remove separator matches and create matches for season range.
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators):
super(SeasonSeparatorRange, self).__init__(range_separators, "season")
@ -391,26 +592,142 @@ class SeasonSeparatorRange(AbstractSeparatorRange):
class RemoveWeakIfMovie(Rule):
"""
Remove weak-movie tagged matches if it seems to be a movie.
Remove weak-episode tagged matches if it seems to be a movie.
"""
priority = 64
consequence = RemoveMatch
def enabled(self, context):
return context.get('type') != 'episode'
def when(self, matches, context):
if matches.named('year') or matches.markers.named('hardcoded-movies'):
return matches.tagged('weak-movie')
to_remove = []
to_ignore = set()
remove = False
for filepart in matches.markers.named('path'):
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
if year:
remove = True
next_match = matches.range(year.end, filepart.end, predicate=lambda m: m.private, index=0)
if (next_match and not matches.holes(year.end, next_match.start, predicate=lambda m: m.raw.strip(seps))
and not matches.at_match(next_match, predicate=lambda m: m.name == 'year')):
to_ignore.add(next_match.initiator)
to_ignore.update(matches.range(filepart.start, filepart.end,
predicate=lambda m: len(m.children.named('episode')) > 1))
to_remove.extend(matches.conflicting(year))
if remove:
to_remove.extend(matches.tagged('weak-episode', predicate=(
lambda m: m.initiator not in to_ignore and 'anime' not in m.tags)))
return to_remove
class RemoveWeak(Rule):
"""
Remove weak-episode matches which appears after video, source, and audio matches.
"""
priority = 16
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
if weaks:
previous = matches.previous(weaks[0], predicate=lambda m: m.name in (
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
'audio_channels', 'audio_profile'), index=0)
if previous and not matches.holes(
previous.end, weaks[0].start, predicate=lambda m: m.raw.strip(seps)):
to_remove.extend(weaks)
return to_remove
class RemoveWeakIfSxxExx(Rule):
"""
Remove weak-movie tagged matches if SxxExx pattern is matched.
Remove weak-episode tagged matches if SxxExx pattern is matched.
Weak episodes at beginning of filepart are kept.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
if matches.tagged('SxxExx', lambda match: not match.private):
return matches.tagged('weak-movie')
to_remove = []
for filepart in matches.markers.named('path'):
if matches.range(filepart.start, filepart.end,
predicate=lambda m: not m.private and 'SxxExx' in m.tags):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags):
if match.start != filepart.start or match.initiator.name != 'weak_episode':
to_remove.append(match)
return to_remove
class RemoveInvalidSeason(Rule):
"""
Remove invalid season matches.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
strong_season = matches.range(filepart.start, filepart.end, index=0,
predicate=lambda m: m.name == 'season'
and not m.private and 'SxxExx' in m.tags)
if strong_season:
if strong_season.initiator.children.named('episode'):
for season in matches.range(strong_season.end, filepart.end,
predicate=lambda m: m.name == 'season' and not m.private):
# remove weak season or seasons without episode matches
if 'SxxExx' not in season.tags or not season.initiator.children.named('episode'):
if season.initiator:
to_remove.append(season.initiator)
to_remove.extend(season.initiator.children)
else:
to_remove.append(season)
return to_remove
class RemoveInvalidEpisode(Rule):
"""
Remove invalid episode matches.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
strong_episode = matches.range(filepart.start, filepart.end, index=0,
predicate=lambda m: m.name == 'episode'
and not m.private and 'SxxExx' in m.tags)
if strong_episode:
strong_ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, strong_episode)
for episode in matches.range(strong_episode.end, filepart.end,
predicate=lambda m: m.name == 'episode' and not m.private):
ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, episode)
if strong_ep_marker and ep_marker and strong_ep_marker.value.lower() != ep_marker.value.lower():
if episode.initiator:
to_remove.append(episode.initiator)
to_remove.extend(episode.initiator.children)
else:
to_remove.append(ep_marker)
to_remove.append(episode)
return to_remove
@staticmethod
def get_episode_prefix(matches, episode):
"""
Return episode prefix: episodeMarker or episodeSeparator
"""
return matches.previous(episode, index=0,
predicate=lambda m: m.name in ('episodeMarker', 'episodeSeparator'))
class RemoveWeakDuplicate(Rule):
@ -425,7 +742,7 @@ class RemoveWeakDuplicate(Rule):
for filepart in matches.markers.named('path'):
patterns = defaultdict(list)
for match in reversed(matches.range(filepart.start, filepart.end,
predicate=lambda match: 'weak-duplicate' in match.tags)):
predicate=lambda m: 'weak-duplicate' in m.tags)):
if match.pattern in patterns[match.name]:
to_remove.append(match)
else:
@ -465,15 +782,15 @@ class RemoveDetachedEpisodeNumber(Rule):
episode_numbers = []
episode_values = set()
for match in matches.named('episode', lambda match: not match.private and 'weak-movie' in match.tags):
for match in matches.named('episode', lambda m: not m.private and 'weak-episode' in m.tags):
if match.value not in episode_values:
episode_numbers.append(match)
episode_values.add(match.value)
episode_numbers = list(sorted(episode_numbers, key=lambda match: match.value))
episode_numbers = list(sorted(episode_numbers, key=lambda m: m.value))
if len(episode_numbers) > 1 and \
episode_numbers[0].value < 10 and \
episode_numbers[1].value - episode_numbers[0].value != 1:
episode_numbers[0].value < 10 and \
episode_numbers[1].value - episode_numbers[0].value != 1:
parent = episode_numbers[0]
while parent: # TODO: Add a feature in rebulk to avoid this ...
ret.append(parent)
@ -514,3 +831,29 @@ class EpisodeSingleDigitValidator(Rule):
if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
ret.append(episode)
return ret
class RenameToDiscMatch(Rule):
"""
Rename episodes detected with `d` episodeMarkers to `disc`.
"""
consequence = [RenameMatch('disc'), RenameMatch('discMarker'), RemoveMatch]
def when(self, matches, context):
discs = []
markers = []
to_remove = []
disc_disabled = is_disabled(context, 'disc')
for marker in matches.named('episodeMarker', predicate=lambda m: m.value.lower() == 'd'):
if disc_disabled:
to_remove.append(marker)
to_remove.extend(marker.initiator.children)
continue
markers.append(marker)
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
return discs, markers, to_remove

View file

@ -3,21 +3,24 @@
"""
film property
"""
from rebulk import Rebulk, AppendMatch, Rule
from rebulk.remodule import re
from rebulk import Rebulk, AppendMatch, Rule
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def film():
def film(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int)
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int,
disabled=lambda context: is_disabled(context, 'film'))
rebulk.rules(FilmTitleRule)
@ -32,7 +35,10 @@ class FilmTitleRule(Rule):
properties = {'film_title': [None]}
def when(self, matches, context):
def enabled(self, context):
return not is_disabled(context, 'film_title')
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
bonus_number = matches.named('film', lambda match: not match.private, index=0)
if bonus_number:
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)

View file

@ -1,67 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
format property
"""
from rebulk.remodule import re
from rebulk import Rebulk, RemoveMatch, Rule
from ..common import dash
from ..common.validators import seps_before, seps_after
def format_():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name="format")
rebulk.regex("VHS", "VHS-?Rip", value="VHS")
rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam")
rebulk.regex("TELESYNC", "TS", "HD-?TS", value="Telesync")
rebulk.regex("WORKPRINT", "WP", value="Workprint")
rebulk.regex("TELECINE", "TC", value="Telecine")
rebulk.regex("PPV", "PPV-?Rip", value="PPV") # Pay Per View
rebulk.regex("SD-?TV", "SD-?TV-?Rip", "Rip-?SD-?TV", "TV-?Rip",
"Rip-?TV", value="TV") # TV is too common to allow matching
rebulk.regex("DVB-?Rip", "DVB", "PD-?TV", value="DVB")
rebulk.regex("DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))", # "DVD-?R(?:$|^E)" => DVD-Real ...
"DVD-?9", "DVD-?5", value="DVD")
rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV")
rebulk.regex("VOD", "VOD-?Rip", value="VOD")
rebulk.regex("WEB-?Rip", value="WEBRip")
rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", value="WEB-DL")
rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD")
rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay")
rebulk.rules(ValidateFormat)
return rebulk
class ValidateFormat(Rule):
"""
Validate format with screener property, with video_codec property or separated
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for format_match in matches.named('format'):
if not seps_before(format_match) and \
not matches.range(format_match.start - 1, format_match.start - 2,
lambda match: match.name == 'other' and match.value == 'Screener'):
ret.append(format_match)
continue
if not seps_after(format_match) and \
not matches.range(format_match.end, format_match.end + 1,
lambda match: match.name == 'video_codec' or (
match.name == 'other' and match.value == 'Screener')):
ret.append(format_match)
continue
return ret

View file

@ -5,59 +5,86 @@ language and subtitle_language properties
"""
# pylint: disable=no-member
import copy
from collections import defaultdict, namedtuple
import babelfish
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch
from ..common.words import iter_words, COMMON_WORDS
from rebulk.remodule import re
from ..common import seps
from ..common.pattern import is_disabled
from ..common.words import iter_words
from ..common.validators import seps_surround
def language():
def language(config, common_words):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:param common_words: common words
:type common_words: set
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()
subtitle_both = config['subtitle_affixes']
subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator)
subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator)
lang_both = config['language_affixes']
lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator)
lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator)
weak_affixes = frozenset(config['weak_affixes'])
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and
is_disabled(context, 'subtitle_language')))
rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True,
validator=seps_surround)
validator=seps_surround, tags=['release-group-prefix'],
disabled=lambda context: is_disabled(context, 'subtitle_language'))
rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True,
validator=seps_surround)
rebulk.functional(find_languages, properties={'language': [None]})
rebulk.rules(SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule)
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'subtitle_language'))
rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True,
validator=seps_surround, tags=['source-suffix'],
disabled=lambda context: is_disabled(context, 'language'))
def find_languages(string, context=None):
"""Find languages in the string
:return: list of tuple (property, Language, lang_word, word)
"""
return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes,
lang_prefixes, lang_suffixes, weak_affixes).find(string)
rebulk.functional(find_languages,
properties={'language': [None]},
disabled=lambda context: not context.get('allowed_languages'))
rebulk.rules(SubtitleExtensionRule,
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
RemoveLanguage,
RemoveInvalidLanguages(common_words))
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
return rebulk
COMMON_WORDS_STRICT = frozenset(['brazil'])
UNDETERMINED = babelfish.Language('und')
SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'],
('ell', None): ['gr', 'greek'],
('spa', None): ['esp', 'español'],
('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'],
('swe', None): ['se'],
('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'],
('cat', None): ['català'],
('ces', None): ['cz'],
('ukr', None): ['ua'],
('zho', None): ['cn'],
('jpn', None): ['jp'],
('hrv', None): ['scr'],
('mul', None): ['multi', 'dl']} # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
def __init__(self):
def __init__(self, synonyms):
self.guessit_exceptions = {}
for (alpha3, country), synlist in SYN.items():
for code, synlist in synonyms.items():
if '_' in code:
(alpha3, country) = code.split('_')
else:
(alpha3, country) = (code, None)
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@ -73,16 +100,8 @@ class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=m
def convert(self, alpha3, country=None, script=None):
return str(babelfish.Language(alpha3, country, script))
def reverse(self, name):
with_country = (GuessitConverter._with_country_regexp.match(name) or
GuessitConverter._with_country_regexp2.match(name))
def reverse(self, name): # pylint:disable=arguments-differ
name = name.lower()
if with_country:
lang = babelfish.Language.fromguessit(with_country.group(1).strip())
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
return lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
@ -94,7 +113,8 @@ class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=m
babelfish.Language.fromalpha3b,
babelfish.Language.fromalpha2,
babelfish.Language.fromname,
babelfish.Language.fromopensubtitles]:
babelfish.Language.fromopensubtitles,
babelfish.Language.fromietf]:
try:
reverse = conv(name)
return reverse.alpha3, reverse.country, reverse.script
@ -104,59 +124,237 @@ class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=m
raise babelfish.LanguageReverseError(name)
babelfish.language_converters['guessit'] = GuessitConverter()
subtitle_both = ['sub', 'subs', 'subbed', 'custom subbed', 'custom subs', 'custom sub', 'customsubbed', 'customsubs',
'customsub']
subtitle_prefixes = subtitle_both + ['st', 'vost', 'subforced', 'fansub', 'hardsub']
subtitle_suffixes = subtitle_both + ['subforced', 'fansub', 'hardsub']
lang_prefixes = ['true']
all_lang_prefixes_suffixes = subtitle_prefixes + subtitle_suffixes + lang_prefixes
def find_languages(string, context=None):
"""Find languages in the string
:return: list of tuple (property, Language, lang_word, word)
def length_comparator(value):
"""
allowed_languages = context.get('allowed_languages')
common_words = COMMON_WORDS_STRICT if allowed_languages else COMMON_WORDS
Return value length.
"""
return len(value)
matches = []
for word_match in iter_words(string):
word = word_match.value
start, end = word_match.span
lang_word = word.lower()
key = 'language'
for prefix in subtitle_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
key = 'subtitle_language'
for suffix in subtitle_suffixes:
if lang_word.endswith(suffix):
lang_word = lang_word[:len(lang_word) - len(suffix)]
key = 'subtitle_language'
for prefix in lang_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
if lang_word not in common_words and word.lower() not in common_words:
try:
lang = babelfish.Language.fromguessit(lang_word)
match = (start, end, {'name': key, 'value': lang})
if allowed_languages:
if lang.name.lower() in allowed_languages \
or lang.alpha2.lower() in allowed_languages \
or lang.alpha3.lower() in allowed_languages:
matches.append(match)
# Keep language with alpha2 equivalent. Others are probably
# uncommon languages.
elif lang == 'mul' or hasattr(lang, 'alpha2'):
matches.append(match)
except babelfish.Error:
pass
return matches
_LanguageMatch = namedtuple('_LanguageMatch', ['property_name', 'word', 'lang'])
class LanguageWord(object):
"""
Extension to the Word namedtuple in order to create compound words.
E.g.: pt-BR, soft subtitles, custom subs
"""
def __init__(self, start, end, value, input_string, next_word=None):
self.start = start
self.end = end
self.value = value
self.input_string = input_string
self.next_word = next_word
@property
def extended_word(self): # pylint:disable=inconsistent-return-statements
"""
Return the extended word for this instance, if any.
"""
if self.next_word:
separator = self.input_string[self.end:self.next_word.start]
next_separator = self.input_string[self.next_word.end:self.next_word.end + 1]
if (separator == '-' and separator != next_separator) or separator in (' ', '.'):
value = self.input_string[self.start:self.next_word.end].replace('.', ' ')
return LanguageWord(self.start, self.next_word.end, value, self.input_string, self.next_word.next_word)
def __repr__(self):
return '<({start},{end}): {value}'.format(start=self.start, end=self.end, value=self.value)
def to_rebulk_match(language_match):
"""
Convert language match to rebulk Match: start, end, dict
"""
word = language_match.word
start = word.start
end = word.end
name = language_match.property_name
if language_match.lang == UNDETERMINED:
return start, end, {
'name': name,
'value': word.value.lower(),
'formatter': babelfish.Language,
'tags': ['weak-language']
}
return start, end, {
'name': name,
'value': language_match.lang
}
class LanguageFinder(object):
"""
Helper class to search and return language matches: 'language' and 'subtitle_language' properties
"""
def __init__(self, context,
subtitle_prefixes, subtitle_suffixes,
lang_prefixes, lang_suffixes, weak_affixes):
allowed_languages = context.get('allowed_languages') if context else None
self.allowed_languages = {l.lower() for l in allowed_languages or []}
self.weak_affixes = weak_affixes
self.prefixes_map = {}
self.suffixes_map = {}
if not is_disabled(context, 'subtitle_language'):
self.prefixes_map['subtitle_language'] = subtitle_prefixes
self.suffixes_map['subtitle_language'] = subtitle_suffixes
self.prefixes_map['language'] = lang_prefixes
self.suffixes_map['language'] = lang_suffixes
def find(self, string):
"""
Return all matches for language and subtitle_language.
Undetermined language matches are removed if a regular language is found.
Multi language matches are removed if there are only undetermined language matches
"""
regular_lang_map = defaultdict(set)
undetermined_map = defaultdict(set)
multi_map = defaultdict(set)
for match in self.iter_language_matches(string):
key = match.property_name
if match.lang == UNDETERMINED:
undetermined_map[key].add(match)
elif match.lang == 'mul':
multi_map[key].add(match)
else:
regular_lang_map[key].add(match)
for key, values in multi_map.items():
if key in regular_lang_map or key not in undetermined_map:
for value in values:
yield to_rebulk_match(value)
for key, values in undetermined_map.items():
if key not in regular_lang_map:
for value in values:
yield to_rebulk_match(value)
for values in regular_lang_map.values():
for value in values:
yield to_rebulk_match(value)
def iter_language_matches(self, string):
"""
Return language matches for the given string.
"""
candidates = []
previous = None
for word in iter_words(string):
language_word = LanguageWord(start=word.span[0], end=word.span[1], value=word.value, input_string=string)
if previous:
previous.next_word = language_word
candidates.append(previous)
previous = language_word
if previous:
candidates.append(previous)
for candidate in candidates:
for match in self.iter_matches_for_candidate(candidate):
yield match
def iter_matches_for_candidate(self, language_word):
"""
Return language matches for the given candidate word.
"""
tuples = [
(language_word, language_word.next_word,
self.prefixes_map,
lambda string, prefix: string.startswith(prefix),
lambda string, prefix: string[len(prefix):]),
(language_word.next_word, language_word,
self.suffixes_map,
lambda string, suffix: string.endswith(suffix),
lambda string, suffix: string[:len(string) - len(suffix)])
]
for word, fallback_word, affixes, is_affix, strip_affix in tuples:
if not word:
continue
match = self.find_match_for_word(word, fallback_word, affixes, is_affix, strip_affix)
if match:
yield match
match = self.find_language_match_for_word(language_word)
if match:
yield match
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements
"""
Return the language match for the given word and affixes.
"""
for current_word in (word.extended_word, word):
if not current_word:
continue
word_lang = current_word.value.lower()
for key, parts in affixes.items():
for part in parts:
if not is_affix(word_lang, part):
continue
match = None
value = strip_affix(word_lang, part)
if not value:
if fallback_word and (
abs(fallback_word.start - word.end) <= 1 or abs(word.start - fallback_word.end) <= 1):
match = self.find_language_match_for_word(fallback_word, key=key)
if not match and part not in self.weak_affixes:
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
'und', current_word.input_string))
else:
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
value, current_word.input_string))
if match:
return match
def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements
"""
Return the language match for the given word.
"""
for current_word in (word.extended_word, word):
if current_word:
match = self.create_language_match(key, current_word)
if match:
return match
def create_language_match(self, key, word): # pylint:disable=inconsistent-return-statements
"""
Create a LanguageMatch for a given word
"""
lang = self.parse_language(word.value.lower())
if lang is not None:
return _LanguageMatch(property_name=key, word=word, lang=lang)
def parse_language(self, lang_word): # pylint:disable=inconsistent-return-statements
"""
Parse the lang_word into a valid Language.
Multi and Undetermined languages are also valid languages.
"""
try:
lang = babelfish.Language.fromguessit(lang_word)
if ((hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) or
(hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) or
lang.alpha3.lower() in self.allowed_languages):
return lang
except babelfish.Error:
pass
class SubtitlePrefixLanguageRule(Rule):
@ -167,6 +365,9 @@ class SubtitlePrefixLanguageRule(Rule):
properties = {'subtitle_language': [None]}
def enabled(self, context):
return not is_disabled(context, 'subtitle_language')
def when(self, matches, context):
to_rename = []
to_remove = matches.named('subtitle_language.prefix')
@ -184,6 +385,7 @@ class SubtitlePrefixLanguageRule(Rule):
lambda match: match.name == 'subtitle_language.prefix', 0)
if prefix:
to_rename.append((prefix, lang))
to_remove.extend(matches.conflicting(lang))
if prefix in to_remove:
to_remove.remove(prefix)
return to_rename, to_remove
@ -211,6 +413,9 @@ class SubtitleSuffixLanguageRule(Rule):
properties = {'subtitle_language': [None]}
def enabled(self, context):
return not is_disabled(context, 'subtitle_language')
def when(self, matches, context):
to_append = []
to_remove = matches.named('subtitle_language.suffix')
@ -233,17 +438,66 @@ class SubtitleSuffixLanguageRule(Rule):
class SubtitleExtensionRule(Rule):
"""
Convert language guess as subtitle_language if next match is a subtitle extension
Convert language guess as subtitle_language if next match is a subtitle extension.
Since it's a strong match, it also removes any conflicting source with it.
"""
consequence = RenameMatch('subtitle_language')
consequence = [RemoveMatch, RenameMatch('subtitle_language')]
properties = {'subtitle_language': [None]}
def when(self, matches, context):
def enabled(self, context):
return not is_disabled(context, 'subtitle_language')
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
subtitle_extension = matches.named('container',
lambda match: 'extension' in match.tags and 'subtitle' in match.tags,
0)
if subtitle_extension:
subtitle_lang = matches.previous(subtitle_extension, lambda match: match.name == 'language', 0)
if subtitle_lang:
return subtitle_lang
for weak in matches.named('subtitle_language', predicate=lambda m: 'weak-language' in m.tags):
weak.private = True
return matches.conflicting(subtitle_lang, lambda m: m.name == 'source'), subtitle_lang
class RemoveLanguage(Rule):
"""Remove language matches that were not converted to subtitle_language when language is disabled."""
consequence = RemoveMatch
def enabled(self, context):
return is_disabled(context, 'language')
def when(self, matches, context):
return matches.named('language')
class RemoveInvalidLanguages(Rule):
"""Remove language matches that matches the blacklisted common words."""
consequence = RemoveMatch
def __init__(self, common_words):
"""Constructor."""
super(RemoveInvalidLanguages, self).__init__()
self.common_words = common_words
def when(self, matches, context):
to_remove = []
for match in matches.range(0, len(matches.input_string),
predicate=lambda m: m.name in ('language', 'subtitle_language')):
if match.raw.lower() not in self.common_words:
continue
group = matches.markers.at_match(match, index=0, predicate=lambda m: m.name == 'group')
if group and (
not matches.range(
group.start, group.end, predicate=lambda m: m.name not in ('language', 'subtitle_language')
) and (not matches.holes(group.start, group.end, predicate=lambda m: m.value.strip(seps)))):
continue
to_remove.append(match)
return to_remove

View file

@ -8,16 +8,23 @@ import mimetypes
from rebulk import Rebulk, CustomRule, POST_PROCESS
from rebulk.match import Match
from ..common.pattern import is_disabled
from ...rules.processors import Processors
def mimetype():
def mimetype(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(Mimetype)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'mimetype'))
rebulk.rules(Mimetype)
return rebulk
class Mimetype(CustomRule):

View file

@ -5,35 +5,50 @@ other property
"""
import copy
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch, POST_PROCESS, AppendMatch
from ..common import dash
from ..common import seps
from ..common.validators import seps_surround, compose
from ...rules.common.formatters import raw_cleanup
from ..common.pattern import is_disabled
from ..common.validators import seps_after, seps_before, seps_surround, compose
from ...reutils import build_or_pattern
from ...rules.common.formatters import raw_cleanup
def other():
def other(config): # pylint:disable=unused-argument,too-many-statements
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="other", validator=seps_surround)
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix')
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix')
rebulk.regex('Dual-?Audio', value='DualAudio')
rebulk.regex('ws', 'wide-?screen', value='WideScreen')
rebulk.string('Netflix', 'NF', value='Netflix')
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after'])
rebulk.string('Proper', 'Repack', 'Rerip', value='Proper')
rebulk.string('Fansub', value='Fansub', tags='has-neighbor')
rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor')
rebulk.string('Proper', 'Repack', 'Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')
season_words = build_or_pattern(["seasons?", "series?"])
complete_articles = build_or_pattern(["The"])
@ -58,24 +73,68 @@ def other():
value={'other': 'Complete'},
tags=['release-group-prefix'],
validator={'__parent__': compose(seps_surround, validate_complete)})
rebulk.string('R5', 'RC', value='R5')
rebulk.string('R5', value='Region 5')
rebulk.string('RC', value='Region C')
rebulk.regex('Pre-?Air', value='Preair')
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
for value in (
'Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
'CC', 'LD', 'MD', 'XXX'):
for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'):
rebulk.string(value, value=value)
for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut',
'Extended', 'Extended Cut'):
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
rebulk.string('HR', value='High Resolution')
rebulk.string('LD', value='Line Dubbed')
rebulk.string('MD', value='Mic Dubbed')
rebulk.string('mHD', 'HDLight', value='Micro HD')
rebulk.string('LDTV', value='Low Definition')
rebulk.string('HFR', value='High Frame Rate')
rebulk.string('HD', value='HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Upscaled?', value='Upscaled')
for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
'Colorized', 'Internal'):
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
rebulk.regex('Read-?NFO', value='Read NFO')
rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
rebulk.string('OM', value='Open Matte', tags='has-neighbor')
rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix'])
rebulk.string('VO', 'OV', value='OV', tags='has-neighbor')
for coast in ('East', 'West'):
rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags='other.validate.screener')
rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
rebulk.string('Ova', 'Oav', value='Original Animated Video')
rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule,
ProperCountRule)
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
rebulk.string('Mux', value='Mux', validator=seps_after,
tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
rebulk.string('HC', 'vost', value='Hardcoded Subtitles')
rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
ValidateAtEnd, ProperCountRule)
return rebulk
@ -90,7 +149,7 @@ class ProperCountRule(Rule):
properties = {'proper_count': [None]}
def when(self, matches, context):
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
propers = matches.named('other', lambda match: match.value == 'Proper')
if propers:
raws = {} # Count distinct raw values
@ -98,15 +157,32 @@ class ProperCountRule(Rule):
raws[raw_cleanup(proper.raw)] = proper
proper_count_match = copy.copy(propers[-1])
proper_count_match.name = 'proper_count'
proper_count_match.value = len(raws)
value = 0
for raw in raws.values():
value += 2 if 'real' in raw.tags else 1
proper_count_match.value = value
return proper_count_match
class RenameAnotherToOther(Rule):
"""
Rename `another` properties to `other`
"""
priority = 32
consequence = RenameMatch('other')
def when(self, matches, context):
return matches.named('another')
class ValidateHasNeighbor(Rule):
"""
Validate tag has-neighbor
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
@ -132,6 +208,7 @@ class ValidateHasNeighborBefore(Rule):
Validate tag has-neighbor-before that previous match exists.
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
@ -151,6 +228,7 @@ class ValidateHasNeighborAfter(Rule):
Validate tag has-neighbor-after that next match exists.
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
@ -175,7 +253,104 @@ class ValidateScreenerRule(Rule):
def when(self, matches, context):
ret = []
for screener in matches.named('other', lambda match: 'other.validate.screener' in match.tags):
format_match = matches.previous(screener, lambda match: match.name == 'format', 0)
if not format_match or matches.input_string[format_match.end:screener.start].strip(seps):
source_match = matches.previous(screener, lambda match: match.initiator.name == 'source', 0)
if not source_match or matches.input_string[source_match.end:screener.start].strip(seps):
ret.append(screener)
return ret
class ValidateMuxRule(Rule):
"""
Validate tag other.validate.mux
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
for mux in matches.named('other', lambda match: 'other.validate.mux' in match.tags):
source_match = matches.previous(mux, lambda match: match.initiator.name == 'source', 0)
if not source_match:
ret.append(mux)
return ret
class ValidateHardcodedSubs(Rule):
"""Validate HC matches."""
priority = 32
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for hc_match in matches.named('other', predicate=lambda match: match.value == 'Hardcoded Subtitles'):
next_match = matches.next(hc_match, predicate=lambda match: match.name == 'subtitle_language', index=0)
if next_match and not matches.holes(hc_match.end, next_match.start,
predicate=lambda match: match.value.strip(seps)):
continue
previous_match = matches.previous(hc_match,
predicate=lambda match: match.name == 'subtitle_language', index=0)
if previous_match and not matches.holes(previous_match.end, hc_match.start,
predicate=lambda match: match.value.strip(seps)):
continue
to_remove.append(hc_match)
return to_remove
class ValidateStreamingServiceNeighbor(Rule):
"""Validate streaming service's neighbors."""
priority = 32
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for match in matches.named('other',
predicate=lambda m: (m.initiator.name != 'source'
and ('streaming_service.prefix' in m.tags
or 'streaming_service.suffix' in m.tags))):
match = match.initiator
if not seps_after(match):
if 'streaming_service.prefix' in match.tags:
next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0)
if next_match and not matches.holes(match.end, next_match.start,
predicate=lambda m: m.value.strip(seps)):
continue
if match.children:
to_remove.extend(match.children)
to_remove.append(match)
elif not seps_before(match):
if 'streaming_service.suffix' in match.tags:
previous_match = matches.previous(match, lambda m: m.name == 'streaming_service', 0)
if previous_match and not matches.holes(previous_match.end, match.start,
predicate=lambda m: m.value.strip(seps)):
continue
if match.children:
to_remove.extend(match.children)
to_remove.append(match)
return to_remove
class ValidateAtEnd(Rule):
"""Validate other which should occur at the end of a filepart."""
priority = 32
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end,
predicate=lambda m: m.name == 'other' and 'at-end' in m.tags):
if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or
matches.range(match.end, filepart.end, predicate=lambda m: m.name not in (
'other', 'container'))):
to_remove.append(match)
return to_remove

View file

@ -7,20 +7,25 @@ from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_surround, int_coercable, compose
from ..common.numeral import numeral, parse_numeral
from ...reutils import build_or_pattern
def part():
def part(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part'))
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
prefixes = ['pt', 'part']
prefixes = config['prefixes']
def validate_roman(match):
"""

View file

@ -5,84 +5,195 @@ release_group property
"""
import copy
from rebulk.remodule import re
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
from rebulk.match import Match
from rebulk import Rebulk, Rule, AppendMatch
from ..common.validators import int_coercable
from ..properties.title import TitleFromPosition
from ..common.formatters import cleanup
from ..common import seps, dash
from ..common import seps
from ..common.expected import build_expected_function
from ..common.comparators import marker_sorted
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import int_coercable, seps_surround
from ..properties.title import TitleFromPosition
def release_group():
def release_group(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(SceneReleaseGroup, AnimeReleaseGroup, ExpectedReleaseGroup)
forbidden_groupnames = config['forbidden_names']
groupname_ignore_seps = config['ignored_seps']
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
def clean_groupname(string):
"""
Removes and strip separators from input_string
:param string:
:type string:
:return:
:rtype:
"""
string = string.strip(groupname_seps)
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
string = string.strip(groupname_ignore_seps)
for forbidden in forbidden_groupnames:
if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps:
string = string[len(forbidden):]
string = string.strip(groupname_seps)
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
string = string[:len(forbidden)]
string = string.strip(groupname_seps)
return string
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
expected_group = build_expected_function('expected_group')
rebulk.functional(expected_group, name='release_group', tags=['expected'],
validator=seps_surround,
conflict_solver=lambda match, other: other,
disabled=lambda context: not context.get('expected_group'))
return rebulk.rules(
DashSeparatedReleaseGroup(clean_groupname),
SceneReleaseGroup(clean_groupname),
AnimeReleaseGroup
)
forbidden_groupnames = ['rip', 'by', 'for', 'par', 'pour', 'bonus']
groupname_ignore_seps = '[]{}()'
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
def clean_groupname(string):
"""
Removes and strip separators from input_string
:param input_string:
:type input_string:
:return:
:rtype:
"""
string = string.strip(groupname_seps)
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps)))\
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
string = string.strip(groupname_ignore_seps)
for forbidden in forbidden_groupnames:
if string.lower().startswith(forbidden):
string = string[len(forbidden):]
string = string.strip(groupname_seps)
if string.lower().endswith(forbidden):
string = string[:len(forbidden)]
string = string.strip(groupname_seps)
return string
_scene_previous_names = ['video_codec', 'format', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
_scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
'subtitle_language.suffix', 'subtitle_language.prefix']
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
_scene_previous_tags = ['release-group-prefix']
_scene_previous_tags = ('release-group-prefix', )
class ExpectedReleaseGroup(Rule):
class DashSeparatedReleaseGroup(Rule):
"""
Add release_group match from expected_group option
Detect dash separated release groups that might appear at the end or at the beginning of a release name.
Series.S01E02.Pilot.DVDRip.x264-CS.mkv
release_group: CS
abc-the.title.name.1983.1080p.bluray.x264.mkv
release_group: abc
At the end: Release groups should be dash-separated and shouldn't contain spaces nor
appear in a group with other matches. The preceding matches should be separated by dot.
If a release group is found, the conflicting matches are removed.
At the beginning: Release groups should be dash-separated and shouldn't contain spaces nor appear in a group.
It should be followed by a hole with dot-separated words.
Detection only happens if no matches exist at the beginning.
"""
consequence = AppendMatch
consequence = [RemoveMatch, AppendMatch]
properties = {'release_group': [None]}
def __init__(self, value_formatter):
"""Default constructor."""
super(DashSeparatedReleaseGroup, self).__init__()
self.value_formatter = value_formatter
def enabled(self, context):
return context.get('expected_group')
@classmethod
def is_valid(cls, matches, candidate, start, end, at_end): # pylint:disable=inconsistent-return-statements
"""
Whether a candidate is a valid release group.
"""
if not at_end:
if len(candidate.value) <= 1:
return False
def when(self, matches, context):
expected_rebulk = Rebulk().defaults(name='release_group')
if matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group'):
return False
for expected_group in context.get('expected_group'):
if expected_group.startswith('re:'):
expected_group = expected_group[3:]
expected_group = expected_group.replace(' ', '-')
expected_rebulk.regex(expected_group, abbreviations=[dash], flags=re.IGNORECASE)
first_hole = matches.holes(candidate.end, end, predicate=lambda m: m.start == candidate.end, index=0)
if not first_hole:
return False
raw_value = first_hole.raw
return raw_value[0] == '-' and '-' not in raw_value[1:] and '.' in raw_value and ' ' not in raw_value
group = matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group', index=0)
if group and matches.at_match(group, predicate=lambda m: not m.private and m.span != candidate.span):
return False
count = 0
match = candidate
while match:
current = matches.range(start,
match.start,
index=-1,
predicate=lambda m: not m.private and not 'expected' in m.tags)
if not current:
break
separator = match.input_string[current.end:match.start]
if not separator and match.raw[0] == '-':
separator = '-'
match = current
if count == 0:
if separator != '-':
break
count += 1
continue
if separator == '.':
return True
def detect(self, matches, start, end, at_end): # pylint:disable=inconsistent-return-statements
"""
Detect release group at the end or at the beginning of a filepart.
"""
candidate = None
if at_end:
container = matches.ending(end, lambda m: m.name == 'container', index=0)
if container:
end = container.start
candidate = matches.ending(end, index=0, predicate=(
lambda m: not m.private and not (
m.name == 'other' and 'not-a-release-group' in m.tags
) and '-' not in m.raw and m.raw.strip() == m.raw))
if not candidate:
if at_end:
candidate = matches.holes(start, end, seps=seps, index=-1,
predicate=lambda m: m.end == end and m.raw.strip(seps) and m.raw[0] == '-')
else:
expected_rebulk.string(expected_group, ignore_case=True)
candidate = matches.holes(start, end, seps=seps, index=0,
predicate=lambda m: m.start == start and m.raw.strip(seps))
matches = expected_rebulk.matches(matches.input_string, context)
return matches
if candidate and self.is_valid(matches, candidate, start, end, at_end):
return candidate
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
if matches.named('release_group'):
return
to_remove = []
to_append = []
for filepart in matches.markers.named('path'):
candidate = self.detect(matches, filepart.start, filepart.end, True)
if candidate:
to_remove.extend(matches.at_match(candidate))
else:
candidate = self.detect(matches, filepart.start, filepart.end, False)
if candidate:
releasegroup = Match(candidate.start, candidate.end, name='release_group',
formatter=self.value_formatter, input_string=candidate.input_string)
if releasegroup.value:
to_append.append(releasegroup)
return to_remove, to_append
class SceneReleaseGroup(Rule):
@ -91,26 +202,61 @@ class SceneReleaseGroup(Rule):
Something.XViD-ReleaseGroup.mkv
"""
dependency = [TitleFromPosition, ExpectedReleaseGroup]
dependency = [TitleFromPosition]
consequence = AppendMatch
properties = {'release_group': [None]}
def when(self, matches, context):
def __init__(self, value_formatter):
"""Default constructor."""
super(SceneReleaseGroup, self).__init__()
self.value_formatter = value_formatter
def when(self, matches, context): # pylint:disable=too-many-locals
# If a release_group is found before, ignore this kind of release_group rule.
ret = []
for filepart in marker_sorted(matches.markers.named('path'), matches):
# pylint:disable=cell-var-from-loop
start, end = filepart.span
if matches.named('release_group', predicate=lambda m: m.start >= start and m.end <= end):
continue
last_hole = matches.holes(start, end + 1, formatter=clean_groupname,
titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)
def keep_only_first_title(match):
"""
Keep only first title from this filepart, as other ones are most likely release group.
:param match:
:type match:
:return:
:rtype:
"""
return match in titles[1:]
last_hole = matches.holes(start, end + 1, formatter=self.value_formatter,
ignore=keep_only_first_title,
predicate=lambda hole: cleanup(hole.value), index=-1)
if last_hole:
def previous_match_filter(match):
"""
Filter to apply to find previous match
:param match:
:type match:
:return:
:rtype:
"""
if match.start < filepart.start:
return False
return not match.private or match.name in _scene_previous_names
previous_match = matches.previous(last_hole,
lambda match: not match.private or
match.name in _scene_previous_names,
previous_match_filter,
index=0)
if previous_match and (previous_match.name in _scene_previous_names or
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
@ -123,12 +269,17 @@ class SceneReleaseGroup(Rule):
# if hole is inside a group marker with same value, remove [](){} ...
group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
if group:
group.formatter = clean_groupname
group.formatter = self.value_formatter
if group.value == last_hole.value:
last_hole.start = group.start + 1
last_hole.end = group.end - 1
last_hole.tags = ['anime']
ignored_matches = matches.range(last_hole.start, last_hole.end, keep_only_first_title)
for ignored_match in ignored_matches:
matches.remove(ignored_match)
ret.append(last_hole)
return ret
@ -139,33 +290,42 @@ class AnimeReleaseGroup(Rule):
...[ReleaseGroup] Something.mkv
"""
dependency = [SceneReleaseGroup, TitleFromPosition]
consequence = AppendMatch
consequence = [RemoveMatch, AppendMatch]
properties = {'release_group': [None]}
def when(self, matches, context):
ret = []
to_remove = []
to_append = []
# If a release_group is found before, ignore this kind of release_group rule.
if matches.named('release_group'):
return to_remove, to_append
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
# This doesn't seems to be an anime
return
# This doesn't seems to be an anime, and we already found another release_group.
return to_remove, to_append
for filepart in marker_sorted(matches.markers.named('path'), matches):
# pylint:disable=bad-continuation
empty_group_marker = matches.markers \
.range(filepart.start, filepart.end, lambda marker: marker.name == 'group'
and not matches.range(marker.start, marker.end)
and not int_coercable(marker.value.strip(seps)),
0)
empty_group = matches.markers.range(filepart.start,
filepart.end,
lambda marker: (marker.name == 'group'
and not matches.range(marker.start, marker.end,
lambda m:
'weak-language' not in m.tags)
and marker.value.strip(seps)
and not int_coercable(marker.value.strip(seps))), 0)
if empty_group_marker:
group = copy.copy(empty_group_marker)
if empty_group:
group = copy.copy(empty_group)
group.marker = False
group.raw_start += 1
group.raw_end -= 1
group.tags = ['anime']
group.name = 'release_group'
ret.append(group)
return ret
to_append.append(group)
to_remove.extend(matches.range(empty_group.start, empty_group.end,
lambda m: 'weak-language' in m.tags))
return to_remove, to_append

View file

@ -3,66 +3,115 @@
"""
screen_size property
"""
from rebulk.match import Match
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk import Rebulk, Rule, RemoveMatch, AppendMatch
from ..common.pattern import is_disabled
from ..common.quantity import FrameRate
from ..common.validators import seps_surround
from ..common import dash
from ..common import dash, seps
from ...reutils import build_or_pattern
def screen_size():
def screen_size(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
def conflict_solver(match, other):
"""
Conflict solver for most screen_size.
"""
if other.name == 'screen_size':
if 'resolution' in other.tags:
# The chtouile to solve conflict in "720 x 432" string matching both 720p pattern
int_value = _digits_re.findall(match.raw)[-1]
if other.value.startswith(int_value):
return match
return other
return '__default__'
interlaced = frozenset({res for res in config['interlaced']})
progressive = frozenset({res for res in config['progressive']})
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
min_ar = config['min_ar']
max_ar = config['max_ar']
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver)
rebulk = Rebulk()
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K")
rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash],
disabled=lambda context: is_disabled(context, 'screen_size'))
_digits_re = re.compile(r'\d+')
frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
interlaced_pattern = build_or_pattern(interlaced, name='height')
progressive_pattern = build_or_pattern(progressive, name='height')
rebulk.defaults(name="screen_size", validator=seps_surround)
rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
formatter=lambda value: 'x'.join(_digits_re.findall(value)),
abbreviations=[dash],
tags=['resolution'],
res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?')
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
rebulk.string('4k', value='2160p')
rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
rebulk.rules(ScreenSizeOnlyOne)
rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))
rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)
return rebulk
class PostProcessScreenSize(Rule):
"""
Process the screen size calculating the aspect ratio if available.
Convert to a standard notation (720p, 1080p, etc) when it's a standard resolution and
aspect ratio is valid or not available.
It also creates an aspect_ratio match when available.
"""
consequence = AppendMatch
def __init__(self, standard_heights, min_ar, max_ar):
super(PostProcessScreenSize, self).__init__()
self.standard_heights = standard_heights
self.min_ar = min_ar
self.max_ar = max_ar
def when(self, matches, context):
to_append = []
for match in matches.named('screen_size'):
if not is_disabled(context, 'frame_rate'):
for frame_rate in match.children.named('frame_rate'):
frame_rate.formatter = FrameRate.fromstring
to_append.append(frame_rate)
values = match.children.to_dict()
if 'height' not in values:
continue
scan_type = (values.get('scan_type') or 'p').lower()
height = values['height']
if 'width' not in values:
match.value = '{0}{1}'.format(height, scan_type)
continue
width = values['width']
calculated_ar = float(width) / float(height)
aspect_ratio = Match(match.start, match.end, input_string=match.input_string,
name='aspect_ratio', value=round(calculated_ar, 3))
if not is_disabled(context, 'aspect_ratio'):
to_append.append(aspect_ratio)
if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar:
match.value = '{0}{1}'.format(height, scan_type)
else:
match.value = '{0}x{1}'.format(width, height)
return to_append
class ScreenSizeOnlyOne(Rule):
"""
Keep a single screen_size pet filepath part.
Keep a single screen_size per filepath part.
"""
consequence = RemoveMatch
@ -71,7 +120,44 @@ class ScreenSizeOnlyOne(Rule):
for filepart in matches.markers.named('path'):
screensize = list(reversed(matches.range(filepart.start, filepart.end,
lambda match: match.name == 'screen_size')))
if len(screensize) > 1:
if len(screensize) > 1 and len(set((match.value for match in screensize))) > 1:
to_remove.extend(screensize[1:])
return to_remove
class ResolveScreenSizeConflicts(Rule):
"""
Resolve screen_size conflicts with season and episode matches.
"""
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
screensize = matches.range(filepart.start, filepart.end, lambda match: match.name == 'screen_size', 0)
if not screensize:
continue
conflicts = matches.conflicting(screensize, lambda match: match.name in ('season', 'episode'))
if not conflicts:
continue
has_neighbor = False
video_profile = matches.range(screensize.end, filepart.end, lambda match: match.name == 'video_profile', 0)
if video_profile and not matches.holes(screensize.end, video_profile.start,
predicate=lambda h: h.value and h.value.strip(seps)):
to_remove.extend(conflicts)
has_neighbor = True
previous = matches.previous(screensize, index=0, predicate=(
lambda m: m.name in ('date', 'source', 'other', 'streaming_service')))
if previous and not matches.holes(previous.end, screensize.start,
predicate=lambda h: h.value and h.value.strip(seps)):
to_remove.extend(conflicts)
has_neighbor = True
if not has_neighbor:
to_remove.append(screensize)
return to_remove

View file

@ -0,0 +1,30 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
size property
"""
import re
from rebulk import Rebulk
from ..common import dash
from ..common.quantity import Size
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def size(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='size', validator=seps_surround)
rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])
return rebulk

View file

@ -0,0 +1,201 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
source property
"""
import copy
from rebulk.remodule import re
from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
from .audio_codec import HqConflictRule
from ..common import dash, seps
from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after
def source(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix'])
rip_prefix = '(?P<other>Rip)-?'
rip_suffix = '-?(?P<other>Rip)'
rip_optional_suffix = '(?:' + rip_suffix + ')?'
def build_source_pattern(*patterns, **kwargs):
"""Helper pattern to build source pattern."""
prefix_format = kwargs.get('prefix') or ''
suffix_format = kwargs.get('suffix') or ''
string_format = prefix_format + '({0})' + suffix_format
return [string_format.format(pattern) for pattern in patterns]
def demote_other(match, other): # pylint: disable=unused-argument
"""Default conflict solver with 'other' property."""
return other if other.name == 'other' else '__default__'
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
value={'source': 'VHS', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
value={'source': 'Camera', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
value={'source': 'HD Camera', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
value={'source': 'Telesync', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
value={'source': 'HD Telesync', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
value={'source': 'Telecine', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
value={'source': 'HD Telecine', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
value={'source': 'Pay-per-view', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
value={'source': 'TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching
value={'source': 'TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
value={'source': 'TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
value={'source': 'Digital TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
value={'source': 'DVD', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
value={'source': 'Digital Master', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ...
'DVD-?9', 'DVD-?5'), value='DVD')
rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
value={'source': 'HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
value={'source': 'HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
value={'source': 'HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
value={'source': 'Video on Demand', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
value={'source': 'Web', 'other': 'Rip'})
# WEBCap is a synonym to WEBRip, mostly used by non english
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'WEB', 'DL-?WEB', 'DL(?=-?Mux)'),
value={'source': 'Web'})
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
value={'source': 'HD-DVD', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
value={'source': 'Blu-ray', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip
value={'source': 'Blu-ray', 'another': 'Reencoded'})
rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip
value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})
rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')
rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
value={'source': 'Ultra HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
value={'source': 'Ultra HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
value={'source': 'Satellite', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
value={'source': 'Satellite', 'other': 'Rip'})
rebulk.rules(ValidateSource, UltraHdBlurayRule)
return rebulk
class UltraHdBlurayRule(Rule):
"""
Replace other:Ultra HD and source:Blu-ray with source:Ultra HD Blu-ray
"""
dependency = HqConflictRule
consequence = [RemoveMatch, AppendMatch]
@classmethod
def find_ultrahd(cls, matches, start, end, index):
"""Find Ultra HD match."""
return matches.range(start, end, index=index, predicate=(
lambda m: not m.private and m.name == 'other' and m.value == 'Ultra HD'
))
@classmethod
def validate_range(cls, matches, start, end):
"""Validate no holes or invalid matches exist in the specified range."""
return (
not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
not matches.range(start, end, predicate=(
lambda m: not m.private and (
m.name not in ('screen_size', 'color_depth') and (
m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
)
def when(self, matches, context):
to_remove = []
to_append = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=(
lambda m: not m.private and m.name == 'source' and m.value == 'Blu-ray')):
other = self.find_ultrahd(matches, filepart.start, match.start, -1)
if not other or not self.validate_range(matches, other.end, match.start):
other = self.find_ultrahd(matches, match.end, filepart.end, 0)
if not other or not self.validate_range(matches, match.end, other.start):
if not matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.name == 'screen_size' and m.value == '2160p')):
continue
if other:
other.private = True
new_source = copy.copy(match)
new_source.value = 'Ultra HD Blu-ray'
to_remove.append(match)
to_append.append(new_source)
return to_remove, to_append
class ValidateSource(Rule):
"""
Validate source with screener property, with video_codec property or separated
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for match in matches.named('source'):
match = match.initiator
if not seps_before(match) and \
not matches.range(match.start - 1, match.start - 2,
lambda m: 'source-prefix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
if not seps_after(match) and \
not matches.range(match.end, match.end + 1,
lambda m: 'source-suffix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret

View file

@ -0,0 +1,198 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
streaming_service property
"""
import re
from rebulk import Rebulk
from rebulk.rules import Rule, RemoveMatch
from ..common.pattern import is_disabled
from ...rules.common import seps, dash
from ...rules.common.validators import seps_before, seps_after
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument
"""Streaming service property.
:param config: rule configuration
:type config: dict
:return:
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service'))
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
rebulk.string('AE', 'A&E', value='A&E')
rebulk.string('AMBC', value='ABC')
rebulk.string('AUBC', value='ABC Australia')
rebulk.string('AJAZ', value='Al Jazeera English')
rebulk.string('AMC', value='AMC')
rebulk.string('AMZN', 'Amazon', value='Amazon Prime')
rebulk.regex('Amazon-?Prime', value='Amazon Prime')
rebulk.string('AS', value='Adult Swim')
rebulk.regex('Adult-?Swim', value='Adult Swim')
rebulk.string('ATK', value="America's Test Kitchen")
rebulk.string('ANPL', value='Animal Planet')
rebulk.string('ANLB', value='AnimeLab')
rebulk.string('AOL', value='AOL')
rebulk.string('ARD', value='ARD')
rebulk.string('iP', value='BBC iPlayer')
rebulk.regex('BBC-?iPlayer', value='BBC iPlayer')
rebulk.string('BRAV', value='BravoTV')
rebulk.string('CNLP', value='Canal+')
rebulk.string('CN', value='Cartoon Network')
rebulk.string('CBC', value='CBC')
rebulk.string('CBS', value='CBS')
rebulk.string('CNBC', value='CNBC')
rebulk.string('CC', value='Comedy Central')
rebulk.string('4OD', value='Channel 4')
rebulk.string('CHGD', value='CHRGD')
rebulk.string('CMAX', value='Cinemax')
rebulk.string('CMT', value='Country Music Television')
rebulk.regex('Comedy-?Central', value='Comedy Central')
rebulk.string('CCGC', value='Comedians in Cars Getting Coffee')
rebulk.string('CR', value='Crunchy Roll')
rebulk.string('CRKL', value='Crackle')
rebulk.regex('Crunchy-?Roll', value='Crunchy Roll')
rebulk.string('CSPN', value='CSpan')
rebulk.string('CTV', value='CTV')
rebulk.string('CUR', value='CuriosityStream')
rebulk.string('CWS', value='CWSeed')
rebulk.string('DSKI', value='Daisuki')
rebulk.string('DHF', value='Deadhouse Films')
rebulk.string('DDY', value='Digiturk Diledigin Yerde')
rebulk.string('DISC', 'Discovery', value='Discovery')
rebulk.string('DSNY', 'Disney', value='Disney')
rebulk.string('DIY', value='DIY Network')
rebulk.string('DOCC', value='Doc Club')
rebulk.string('DPLY', value='DPlay')
rebulk.string('ETV', value='E!')
rebulk.string('EPIX', value='ePix')
rebulk.string('ETTV', value='El Trece')
rebulk.string('ESPN', value='ESPN')
rebulk.string('ESQ', value='Esquire')
rebulk.string('FAM', value='Family')
rebulk.string('FJR', value='Family Jr')
rebulk.string('FOOD', value='Food Network')
rebulk.string('FOX', value='Fox')
rebulk.string('FREE', value='Freeform')
rebulk.string('FYI', value='FYI Network')
rebulk.string('GLBL', value='Global')
rebulk.string('GLOB', value='GloboSat Play')
rebulk.string('HLMK', value='Hallmark')
rebulk.string('HBO', value='HBO Go')
rebulk.regex('HBO-?Go', value='HBO Go')
rebulk.string('HGTV', value='HGTV')
rebulk.string('HIST', 'History', value='History')
rebulk.string('HULU', value='Hulu')
rebulk.string('ID', value='Investigation Discovery')
rebulk.string('IFC', value='IFC')
rebulk.string('iTunes', 'iT', value='iTunes')
rebulk.string('ITV', value='ITV')
rebulk.string('KNOW', value='Knowledge Network')
rebulk.string('LIFE', value='Lifetime')
rebulk.string('MTOD', value='Motor Trend OnDemand')
rebulk.string('MNBC', value='MSNBC')
rebulk.string('MTV', value='MTV')
rebulk.string('NATG', value='National Geographic')
rebulk.regex('National-?Geographic', value='National Geographic')
rebulk.string('NBA', value='NBA TV')
rebulk.regex('NBA-?TV', value='NBA TV')
rebulk.string('NBC', value='NBC')
rebulk.string('NF', 'Netflix', value='Netflix')
rebulk.string('NFL', value='NFL')
rebulk.string('NFLN', value='NFL Now')
rebulk.string('GC', value='NHL GameCenter')
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
rebulk.string('NRK', value='Norsk Rikskringkasting')
rebulk.string('PBS', value='PBS')
rebulk.string('PBSK', value='PBS Kids')
rebulk.string('PSN', value='Playstation Network')
rebulk.string('PLUZ', value='Pluzz')
rebulk.string('RTE', value='RTE One')
rebulk.string('SBS', value='SBS (AU)')
rebulk.string('SESO', 'SeeSo', value='SeeSo')
rebulk.string('SHMI', value='Shomi')
rebulk.string('SPIK', value='Spike')
rebulk.string('SPKE', value='Spike TV')
rebulk.regex('Spike-?TV', value='Spike TV')
rebulk.string('SNET', value='Sportsnet')
rebulk.string('SPRT', value='Sprout')
rebulk.string('STAN', value='Stan')
rebulk.string('STZ', value='Starz')
rebulk.string('SVT', value='Sveriges Television')
rebulk.string('SWER', value='SwearNet')
rebulk.string('SYFY', value='Syfy')
rebulk.string('TBS', value='TBS')
rebulk.string('TFOU', value='TFou')
rebulk.string('CW', value='The CW')
rebulk.regex('The-?CW', value='The CW')
rebulk.string('TLC', value='TLC')
rebulk.string('TUBI', value='TubiTV')
rebulk.string('TV3', value='TV3 Ireland')
rebulk.string('TV4', value='TV4 Sweeden')
rebulk.string('TVL', value='TV Land')
rebulk.regex('TV-?Land', value='TV Land')
rebulk.string('UFC', value='UFC')
rebulk.string('UKTV', value='UKTV')
rebulk.string('UNIV', value='Univision')
rebulk.string('USAN', value='USA Network')
rebulk.string('VLCT', value='Velocity')
rebulk.string('VH1', value='VH1')
rebulk.string('VICE', value='Viceland')
rebulk.string('VMEO', value='Vimeo')
rebulk.string('VRV', value='VRV')
rebulk.string('WNET', value='W Network')
rebulk.string('WME', value='WatchMe')
rebulk.string('WWEN', value='WWE Network')
rebulk.string('XBOX', value='Xbox Video')
rebulk.string('YHOO', value='Yahoo')
rebulk.string('RED', value='YouTube Red')
rebulk.string('ZDF', value='ZDF')
rebulk.rules(ValidateStreamingService)
return rebulk
class ValidateStreamingService(Rule):
"""Validate streaming service matches."""
priority = 32
consequence = RemoveMatch
def when(self, matches, context):
"""Streaming service is always before source.
:param matches:
:type matches: rebulk.match.Matches
:param context:
:type context: dict
:return:
"""
to_remove = []
for service in matches.named('streaming_service'):
next_match = matches.next(service, lambda match: 'streaming_service.suffix' in match.tags, 0)
previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
has_other = service.initiator and service.initiator.children.named('other')
if not has_other:
if (not next_match or
matches.holes(service.end, next_match.start,
predicate=lambda match: match.value.strip(seps)) or
not seps_before(service)):
if (not previous_match or
matches.holes(previous_match.end, service.start,
predicate=lambda match: match.value.strip(seps)) or
not seps_after(service)):
to_remove.append(service)
continue
if service.value == 'Comedy Central':
# Current match is a valid streaming service, removing invalid Criterion Collection (CC) matches
to_remove.extend(matches.named('edition', predicate=lambda match: match.value == 'Criterion'))
return to_remove

View file

@ -3,54 +3,37 @@
"""
title property
"""
import re
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
from rebulk.formatters import formatters
from rebulk.pattern import RePattern
from rebulk.utils import find_all
from .film import FilmTitleRule
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
from ..common.formatters import cleanup, reorder_title
from ..common import seps, title_seps
from ..common.comparators import marker_sorted
from ..common import seps, title_seps, dash
from ..common.expected import build_expected_function
from ..common.formatters import cleanup, reorder_title
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
def title():
def title(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
rebulk.rules(TitleFromPosition, PreferTitleWithYear)
def expected_title(input_string, context):
"""
Expected title functional pattern.
:param input_string:
:type input_string:
:param context:
:type context:
:return:
:rtype:
"""
ret = []
for search in context.get('expected_title'):
if search.startswith('re:'):
search = search[3:]
search = search.replace(' ', '-')
matches = RePattern(search, abbreviations=[dash], flags=re.IGNORECASE).matches(input_string, context)
for match in matches:
# Instance of 'list' has no 'span' member (no-member). Seems to be a pylint bug.
# pylint: disable=no-member
ret.append(match.span)
else:
for start in find_all(input_string, search, ignore_case=True):
ret.append((start, start+len(search)))
return ret
expected_title = build_expected_function('expected_title')
rebulk.functional(expected_title, name='title', tags=['expected'],
rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
validator=seps_surround,
formatter=formatters(cleanup, reorder_title),
conflict_solver=lambda match, other: other,
disabled=lambda context: not context.get('expected_title'))
@ -116,7 +99,7 @@ class TitleBaseRule(Rule):
Full word language and countries won't be ignored if they are uppercase.
"""
return not (len(match) > 3 and match.raw.isupper()) and match.name in ['language', 'country', 'episode_details']
return not (len(match) > 3 and match.raw.isupper()) and match.name in ('language', 'country', 'episode_details')
def should_keep(self, match, to_keep, matches, filepart, hole, starting):
"""
@ -136,7 +119,7 @@ class TitleBaseRule(Rule):
:return:
:rtype:
"""
if match.name in ['language', 'country']:
if match.name in ('language', 'country'):
# Keep language if exactly matching the hole.
if len(hole.value) == len(match.raw):
return True
@ -149,7 +132,7 @@ class TitleBaseRule(Rule):
lambda c_match: c_match.name == match.name and
c_match not in to_keep))
if not other_languages:
if not other_languages and (not starting or len(match.raw) <= 3):
return True
return False
@ -164,10 +147,10 @@ class TitleBaseRule(Rule):
:return:
"""
if context.get('type') == 'episode' and match.name == 'episode_details':
return False
return match.start >= hole.start and match.end <= hole.end
return True
def check_titles_in_filepart(self, filepart, matches, context):
def check_titles_in_filepart(self, filepart, matches, context): # pylint:disable=inconsistent-return-statements
"""
Find title in filepart (ignoring language)
"""
@ -176,12 +159,11 @@ class TitleBaseRule(Rule):
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
ignore=self.is_ignored,
predicate=lambda hole: hole.value)
predicate=lambda m: m.value)
holes = self.holes_process(holes, matches)
for hole in holes:
# pylint:disable=cell-var-from-loop
if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
continue
@ -192,8 +174,8 @@ class TitleBaseRule(Rule):
if ignored_matches:
for ignored_match in reversed(ignored_matches):
# pylint:disable=undefined-loop-variable
trailing = matches.chain_before(hole.end, seps, predicate=lambda match: match == ignored_match)
# pylint:disable=undefined-loop-variable, cell-var-from-loop
trailing = matches.chain_before(hole.end, seps, predicate=lambda m: m == ignored_match)
if trailing:
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
if should_keep:
@ -210,7 +192,7 @@ class TitleBaseRule(Rule):
for ignored_match in ignored_matches:
if ignored_match not in to_keep:
starting = matches.chain_after(hole.start, seps,
predicate=lambda match: match == ignored_match)
predicate=lambda m: m == ignored_match)
if starting:
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
if should_keep:
@ -236,7 +218,7 @@ class TitleBaseRule(Rule):
hole.tags = self.match_tags
if self.alternative_match_name:
# Split and keep values that can be a title
titles = hole.split(title_seps, lambda match: match.value)
titles = hole.split(title_seps, lambda m: m.value)
for title_match in list(titles[1:]):
previous_title = titles[titles.index(title_match) - 1]
separator = matches.input_string[previous_title.end:title_match.start]
@ -253,14 +235,15 @@ class TitleBaseRule(Rule):
return titles, to_remove
def when(self, matches, context):
ret = []
to_remove = []
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
return
return ret, to_remove
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
to_remove = []
# Priorize fileparts containing the year
years_fileparts = []
for filepart in fileparts:
@ -268,7 +251,6 @@ class TitleBaseRule(Rule):
if year_match:
years_fileparts.append(filepart)
ret = []
for filepart in fileparts:
try:
years_fileparts.remove(filepart)
@ -304,6 +286,9 @@ class TitleFromPosition(TitleBaseRule):
def __init__(self):
super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')
def enabled(self, context):
return not is_disabled(context, 'alternative_title')
class PreferTitleWithYear(Rule):
"""
@ -324,7 +309,7 @@ class PreferTitleWithYear(Rule):
if filepart:
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
if year_match:
group = matches.markers.at_match(year_match, lambda group: group.name == 'group')
group = matches.markers.at_match(year_match, lambda m: m.name == 'group')
if group:
with_year_in_group.append(title_match)
else:
@ -332,13 +317,13 @@ class PreferTitleWithYear(Rule):
to_tag = []
if with_year_in_group:
title_values = set([title_match.value for title_match in with_year_in_group])
title_values = {title_match.value for title_match in with_year_in_group}
to_tag.extend(with_year_in_group)
elif with_year:
title_values = set([title_match.value for title_match in with_year])
title_values = {title_match.value for title_match in with_year}
to_tag.extend(with_year)
else:
title_values = set([title_match.value for title_match in titles])
title_values = {title_match.value for title_match in titles}
to_remove = []
for title_match in titles:

View file

@ -6,6 +6,7 @@ type property
from rebulk import CustomRule, Rebulk, POST_PROCESS
from rebulk.match import Match
from ..common.pattern import is_disabled
from ...rules.processors import Processors
@ -19,13 +20,19 @@ def _type(matches, value):
matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
def type_():
def type_(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(TypeProcessor)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'type'))
rebulk = rebulk.rules(TypeProcessor)
return rebulk
class TypeProcessor(CustomRule):
@ -45,9 +52,10 @@ class TypeProcessor(CustomRule):
episode = matches.named('episode')
season = matches.named('season')
absolute_episode = matches.named('absolute_episode')
episode_details = matches.named('episode_details')
if episode or season or episode_details:
if episode or season or episode_details or absolute_episode:
return 'episode'
film = matches.named('film')

View file

@ -7,42 +7,71 @@ from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from guessit.rules.common.validators import seps_after, seps_before
from ..common import dash
from ..common.validators import seps_surround
from ..common.pattern import is_disabled
from ..common.validators import seps_after, seps_before, seps_surround
def video_codec():
def video_codec(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="video_codec")
rebulk = Rebulk()
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="video_codec",
tags=['source-suffix', 'streaming_service.suffix'],
disabled=lambda context: is_disabled(context, 'video_codec'))
rebulk.regex(r"Rv\d{2}", value="Real")
rebulk.regex("Mpeg2", value="Mpeg2")
rebulk.regex("DVDivX", "DivX", value="DivX")
rebulk.regex("XviD", value="XviD")
rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVCHD", value="h264")
rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265")
rebulk.regex(r'Rv\d{2}', value='RealVideo')
rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
rebulk.string("DVDivX", "DivX", value="DivX")
rebulk.string('XviD', value='Xvid')
rebulk.regex('VC-?1', value='VC-1')
rebulk.string('VP7', value='VP7')
rebulk.string('VP8', 'VP80', value='VP8')
rebulk.string('VP9', value='VP9')
rebulk.regex('[hx]-?263', value='H.263')
rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
tags=['video-codec-suffix'], children=True)
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
# http://fr.wikipedia.org/wiki/H.264
rebulk.defaults(name="video_profile", validator=seps_surround)
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
rebulk.defaults(name="video_profile",
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'video_profile'))
rebulk.regex('10.?bit', 'Hi10P', value='10bit')
rebulk.regex('8.?bit', value='8bit')
rebulk.string('BP', value='Baseline', tags='video_profile.rule')
rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
rebulk.string('MP', value='Main', tags='video_profile.rule')
rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')
rebulk.string('BP', value='BP', tags='video_profile.rule')
rebulk.string('XP', 'EP', value='XP', tags='video_profile.rule')
rebulk.string('MP', value='MP', tags='video_profile.rule')
rebulk.string('HP', 'HiP', value='HP', tags='video_profile.rule')
rebulk.regex('Hi422P', value='Hi422P', tags='video_profile.rule')
rebulk.regex('Hi444PP', value='Hi444PP', tags='video_profile.rule')
# https://en.wikipedia.org/wiki/Scalable_Video_Coding
rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
# https://en.wikipedia.org/wiki/AVCHD
rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
# https://en.wikipedia.org/wiki/H.265/HEVC
rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')
rebulk.string('DXVA', value='DXVA', name='video_api')
rebulk.regex('Hi422P', value='High 4:2:2')
rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
rebulk.regex('Hi10P?', value='High 10') # no profile validation is required
rebulk.string('DXVA', value='DXVA', name='video_api',
disabled=lambda context: is_disabled(context, 'video_api'))
rebulk.defaults(name='color_depth',
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'color_depth'))
rebulk.regex('12.?bits?', value='12-bit')
rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
rebulk.regex('8.?bits?', value='8-bit')
rebulk.rules(ValidateVideoCodec, VideoProfileRule)
@ -51,19 +80,23 @@ def video_codec():
class ValidateVideoCodec(Rule):
"""
Validate video_codec with format property or separated
Validate video_codec with source property or separated
"""
priority = 64
consequence = RemoveMatch
def enabled(self, context):
return not is_disabled(context, 'video_codec')
def when(self, matches, context):
ret = []
for codec in matches.named('video_codec'):
if not seps_before(codec) and \
not matches.at_index(codec.start - 1, lambda match: match.name == 'format'):
not matches.at_index(codec.start - 1, lambda match: 'video-codec-prefix' in match.tags):
ret.append(codec)
continue
if not seps_after(codec):
if not seps_after(codec) and \
not matches.at_index(codec.end + 1, lambda match: 'video-codec-suffix' in match.tags):
ret.append(codec)
continue
return ret
@ -75,11 +108,16 @@ class VideoProfileRule(Rule):
"""
consequence = RemoveMatch
def enabled(self, context):
return not is_disabled(context, 'video_profile')
def when(self, matches, context):
profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
ret = []
for profile in profile_list:
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
codec = matches.at_span(profile.span, lambda match: match.name == 'video_codec', 0)
if not codec:
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
if not codec:
codec = matches.next(profile, lambda match: match.name == 'video_codec')
if not codec:

View file

@ -7,25 +7,37 @@ from pkg_resources import resource_stream # @UnresolvedImport
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from ..common import seps
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
from ...reutils import build_or_pattern
def website():
def website(config):
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(name="website")
tlds = [l.strip().decode('utf-8')
for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
if b'--' not in l][1:] # All registered domain extension
with resource_stream('guessit', 'tlds-alpha-by-domain.txt') as tld_file:
tlds = [
tld.strip().decode('utf-8')
for tld in tld_file.readlines()
if b'--' not in tld
][1:] # All registered domain extension
safe_tlds = ['com', 'org', 'net'] # For sure a website extension
safe_subdomains = ['www'] # For sure a website subdomain
safe_prefix = ['co', 'com', 'org', 'net'] # Those words before a tlds are sure
safe_tlds = config['safe_tlds'] # For sure a website extension
safe_subdomains = config['safe_subdomains'] # For sure a website subdomain
safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure
website_prefixes = config['prefixes']
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
@ -41,6 +53,9 @@ def website():
r'))(?:[^a-z0-9]|$)',
safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)
rebulk.string(*website_prefixes,
validator=seps_surround, private=True, tags=['website.prefix'])
class PreferTitleOverWebsite(Rule):
"""
If found match is more likely a title, remove website.
@ -57,11 +72,35 @@ def website():
def when(self, matches, context):
to_remove = []
for website_match in matches.named('website'):
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
if suffix:
to_remove.append(website_match)
safe = False
for safe_start in safe_subdomains + safe_prefix:
if website_match.value.lower().startswith(safe_start):
safe = True
break
if not safe:
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
if suffix:
to_remove.append(website_match)
return to_remove
rebulk.rules(PreferTitleOverWebsite)
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)
return rebulk
class ValidateWebsitePrefix(Rule):
"""
Validate website prefixes
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for prefix in matches.tagged('website.prefix'):
website_match = matches.next(prefix, predicate=lambda match: match.name == 'website', index=0)
if (not website_match or
matches.holes(prefix.end, website_match.start,
formatter=cleanup, seps=seps, predicate=lambda match: match.value)):
to_remove.append(prefix)
return to_remove

View file

@ -0,0 +1 @@
Not a configuration file

View file

@ -0,0 +1,4 @@
{
"expected_title": ["The 100", "OSS 117"],
"yaml": false
}

View file

@ -0,0 +1,4 @@
expected_title:
- The 100
- OSS 117
yaml: True

View file

@ -0,0 +1,4 @@
expected_title:
- The 100
- OSS 117
yaml: True

View file

@ -0,0 +1,335 @@
? vorbis
: options: --exclude audio_codec
-audio_codec: Vorbis
? DTS-ES
: options: --exclude audio_profile
audio_codec: DTS
-audio_profile: Extended Surround
? DTS.ES
: options: --include audio_codec
audio_codec: DTS
-audio_profile: Extended Surround
? 5.1
? 5ch
? 6ch
: options: --exclude audio_channels
-audio_channels: '5.1'
? Movie Title-x01-Other Title.mkv
? Movie Title-x01-Other Title
? directory/Movie Title-x01-Other Title/file.mkv
: options: --exclude bonus
-bonus: 1
-bonus_title: Other Title
? Title-x02-Bonus Title.mkv
: options: --include bonus
bonus: 2
-bonus_title: Other Title
? cd 1of3
: options: --exclude cd
-cd: 1
-cd_count: 3
? This.Is.Us
: options: --exclude country
title: This Is Us
-country: US
? 2015.01.31
: options: --exclude date
year: 2015
-date: 2015-01-31
? Something 2 mar 2013)
: options: --exclude date
-date: 2013-03-02
? 2012 2009 S01E02 2015 # If no year is marked, the second one is guessed.
: options: --exclude year
-year: 2009
? Director's cut
: options: --exclude edition
-edition: Director's Cut
? 2x5
? 2X5
? 02x05
? 2X05
? 02x5
? S02E05
? s02e05
? s02e5
? s2e05
? s02ep05
? s2EP5
: options: --exclude season
-season: 2
-episode: 5
? 2x6
? 2X6
? 02x06
? 2X06
? 02x6
? S02E06
? s02e06
? s02e6
? s2e06
? s02ep06
? s2EP6
: options: --exclude episode
-season: 2
-episode: 6
? serie Season 2 other
: options: --exclude season
-season: 2
? Some Dummy Directory/S02 Some Series/E01-Episode title.mkv
: options: --exclude episode_title
-episode_title: Episode title
season: 2
episode: 1
? Another Dummy Directory/S02 Some Series/E01-Episode title.mkv
: options: --include season --include episode
-episode_title: Episode title
season: 2
episode: 1
# pattern contains season and episode: it wont work enabling only one
? Some Series S03E01E02
: options: --include episode
-season: 3
-episode: [1, 2]
# pattern contains season and episode: it wont work enabling only one
? Another Series S04E01E02
: options: --include season
-season: 4
-episode: [1, 2]
? Show.Name.Season.4.Episode.1
: options: --include episode
-season: 4
episode: 1
? Another.Show.Name.Season.4.Episode.1
: options: --include season
season: 4
-episode: 1
? Some Series S01 02 03
: options: --exclude season
-season: [1, 2, 3]
? Some Series E01 02 04
: options: --exclude episode
-episode: [1, 2, 4]
? A very special episode s06 special
: options: -t episode --exclude episode_details
season: 6
-episode_details: Special
? S01D02.3-5-GROUP
: options: --exclude disc
-season: 1
-disc: [2, 3, 4, 5]
-episode: [2, 3, 4, 5]
? S01D02&4-6&8
: options: --exclude season
-season: 1
-disc: [2, 4, 5, 6, 8]
-episode: [2, 4, 5, 6, 8]
? Film Title-f01-Series Title.mkv
: options: --exclude film
-film: 1
-film_title: Film Title
? Another Film Title-f01-Series Title.mkv
: options: --exclude film_title
film: 1
-film_title: Film Title
? English
? .ENG.
: options: --exclude language
-language: English
? SubFrench
? SubFr
? STFr
: options: --exclude subtitle_language
-language: French
-subtitle_language: French
? ST.FR
: options: --exclude subtitle_language
language: French
-subtitle_language: French
? ENG.-.sub.FR
? ENG.-.FR Sub
: options: --include language
language: [English, French]
-subtitle_language: French
? ENG.-.SubFR
: options: --include language
language: English
-subtitle_language: French
? ENG.-.FRSUB
? ENG.-.FRSUBS
? ENG.-.FR-SUBS
: options: --include subtitle_language
-language: English
subtitle_language: French
? DVD.Real.XViD
? DVD.fix.XViD
: options: --exclude other
-other: Fix
-proper_count: 1
? Part 3
? Part III
? Part Three
? Part Trois
? Part3
: options: --exclude part
-part: 3
? Some.Title.XViD-by.Artik[SEDG].avi
: options: --exclude release_group
-release_group: Artik[SEDG]
? "[ABC] Some.Title.avi"
? some/folder/[ABC]Some.Title.avi
: options: --exclude release_group
-release_group: ABC
? 360p
? 360px
? "360"
? +500x360
: options: --exclude screen_size
-screen_size: 360p
? 640x360
: options: --exclude aspect_ratio
screen_size: 360p
-aspect_ratio: 1.778
? 8196x4320
: options: --exclude screen_size
-screen_size: 4320p
-aspect_ratio: 1.897
? 4.3gb
: options: --exclude size
-size: 4.3GB
? VhS_rip
? VHS.RIP
: options: --exclude source
-source: VHS
-other: Rip
? DVD.RIP
: options: --include other
-source: DVD
-other: Rip
? Title Only.avi
: options: --exclude title
-title: Title Only
? h265
? x265
? h.265
? x.265
? hevc
: options: --exclude video_codec
-video_codec: H.265
? hevc10
: options: --include color_depth
-video_codec: H.265
-color_depth: 10-bit
? HEVC-YUV420P10
: options: --include color_depth
-video_codec: H.265
color_depth: 10-bit
? h265-HP
: options: --exclude video_profile
video_codec: H.265
-video_profile: High
? House.of.Cards.2013.S02E03.1080p.NF.WEBRip.DD5.1.x264-NTb.mkv
? House.of.Cards.2013.S02E03.1080p.Netflix.WEBRip.DD5.1.x264-NTb.mkv
: options: --exclude streaming_service
-streaming_service: Netflix
? wawa.co.uk
: options: --exclude website
-website: wawa.co.uk
? movie.mkv
: options: --exclude mimetype
-mimetype: video/x-matroska
? another movie.mkv
: options: --exclude container
-container: mkv
? series s02e01
: options: --exclude type
-type: episode
? series s02e01
: options: --exclude type
-type: episode
? Hotel.Hell.S01E01.720p.DD5.1.448kbps-ALANiS
: options: --exclude audio_bit_rate
-audio_bit_rate: 448Kbps
? Katy Perry - Pepsi & Billboard Summer Beats Concert Series 2012 1080i HDTV 20 Mbps DD2.0 MPEG2-TrollHD.ts
: options: --exclude video_bit_rate
-video_bit_rate: 20Mbps
? "[Figmentos] Monster 34 - At the End of Darkness [781219F1].mkv"
: options: --exclude crc32
-crc32: 781219F1
? 1080p25
: options: --exclude frame_rate
screen_size: 1080p
-frame_rate: 25fps
? 1080p25
: options: --exclude screen_size
-screen_size: 1080p
-frame_rate: 25fps
? 1080p25
: options: --include frame_rate
-screen_size: 1080p
-frame_rate: 25fps
? 1080p 30fps
: options: --exclude screen_size
-screen_size: 1080p
frame_rate: 30fps

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -8,23 +8,29 @@
? +lame3.100
: audio_codec: MP3
? +MP2
: audio_codec: MP2
? +DolbyDigital
? +DD
? +Dolby Digital
: audio_codec: DolbyDigital
? +AC3
: audio_codec: Dolby Digital
? +DDP
? +DD+
? +EAC3
: audio_codec: Dolby Digital Plus
? +DolbyAtmos
? +Dolby Atmos
? +Atmos
? -Atmosphere
: audio_codec: DolbyAtmos
: audio_codec: Dolby Atmos
? +AAC
: audio_codec: AAC
? +AC3
: audio_codec: AC3
? +Flac
: audio_codec: FLAC
@ -33,29 +39,37 @@
? +True-HD
? +trueHD
: audio_codec: TrueHD
: audio_codec: Dolby TrueHD
? +True-HD51
? +trueHD51
: audio_codec: Dolby TrueHD
audio_channels: '5.1'
? +DTSHD
? +DTS HD
? +DTS-HD
: audio_codec: DTS
audio_profile: HD
: audio_codec: DTS-HD
? +DTS-HDma
: audio_codec: DTS
audio_profile: HDMA
? +DTSMA
: audio_codec: DTS-HD
audio_profile: Master Audio
? +AC3-hq
: audio_codec: AC3
audio_profile: HQ
: audio_codec: Dolby Digital
audio_profile: High Quality
? +AAC-HE
: audio_codec: AAC
audio_profile: HE
audio_profile: High Efficiency
? +AAC-LC
: audio_codec: AAC
audio_profile: LC
audio_profile: Low Complexity
? +AAC2.0
? +AAC20
: audio_codec: AAC
audio_channels: '2.0'
@ -79,5 +93,42 @@
: audio_channels: '1.0'
? DD5.1
: audio_codec: DolbyDigital
? DD51
: audio_codec: Dolby Digital
audio_channels: '5.1'
? -51
: audio_channels: '5.1'
? DTS-HD.HRA
? DTSHD.HRA
? DTS-HD.HR
? DTSHD.HR
? -HRA
? -HR
: audio_codec: DTS-HD
audio_profile: High Resolution Audio
? DTSES
? DTS-ES
? -ES
: audio_codec: DTS
audio_profile: Extended Surround
? DD-EX
? DDEX
? -EX
: audio_codec: Dolby Digital
audio_profile: EX
? OPUS
: audio_codec: Opus
? Vorbis
: audio_codec: Vorbis
? PCM
: audio_codec: PCM
? LPCM
: audio_codec: LPCM

View file

@ -7,4 +7,4 @@
? Some.Title-DVDRIP-x264-CDP
: cd: !!null
release_group: CDP
video_codec: h264
video_codec: H.264

View file

@ -8,3 +8,6 @@
? This.is.us.title
: title: This is us title
? This.Is.Us
: options: --no-default-config
title: This Is Us

View file

@ -2,24 +2,62 @@
# Use - marker to check inputs that should not match results.
? Director's cut
? Edition Director's cut
: edition: Director's cut
: edition: Director's Cut
? Collector
? Collector Edition
? Edition Collector
: edition: Collector Edition
: edition: Collector
? Special Edition
? Edition Special
? -Special
: edition: Special Edition
: edition: Special
? Criterion Edition
? Edition Criterion
? CC
? -Criterion
: edition: Criterion Edition
: edition: Criterion
? Deluxe
? Deluxe Edition
? Edition Deluxe
: edition: Deluxe Edition
: edition: Deluxe
? Super Movie Alternate XViD
? Super Movie Alternative XViD
? Super Movie Alternate Cut XViD
? Super Movie Alternative Cut XViD
: edition: Alternative Cut
? ddc
: edition: Director's Definitive Cut
? IMAX
? IMAX Edition
: edition: IMAX
? ultimate edition
? -ultimate
: edition: Ultimate
? ultimate collector edition
? ultimate collector's edition
? ultimate collectors edition
? -collectors edition
? -ultimate edition
: edition: [Ultimate, Collector]
? ultimate collectors edition dc
: edition: [Ultimate, Collector, Director's Cut]
? fan edit
? fan edition
? fan collection
: edition: Fan
? ultimate fan edit
? ultimate fan edition
? ultimate fan collection
: edition: [Ultimate, Fan]

View file

@ -32,8 +32,6 @@
? +serie Season 2 other
? +serie Saisons 2 other
? +serie Seasons 2 other
? +serie Serie 2 other
? +serie Series 2 other
? +serie Season Two other
? +serie Season II other
: season: 2
@ -116,10 +114,15 @@
? -A very special movie
: episode_details: Special
? A very special episode
? -A very special episode
: options: -t episode
episode_details: Special
? A very special episode s06 special
: options: -t episode
title: A very special episode
episode_details: Special
? 12 Monkeys\Season 01\Episode 05\12 Monkeys - S01E05 - The Night Room.mkv
: container: mkv
title: 12 Monkeys
@ -141,7 +144,7 @@
? Show.Name.-.Season.1.to.3.-.Mp4.1080p
? Show.Name.-.Season.1~3.-.Mp4.1080p
? Show.Name.-.Saison.1.a.3.-.Mp4.1080p
: container: MP4
: container: mp4
screen_size: 1080p
season:
- 1
@ -151,7 +154,7 @@
? Show.Name.Season.1.3&5.HDTV.XviD-GoodGroup[SomeTrash]
? Show.Name.Season.1.3 and 5.HDTV.XviD-GoodGroup[SomeTrash]
: format: HDTV
: source: HDTV
release_group: GoodGroup[SomeTrash]
season:
- 1
@ -159,12 +162,12 @@
- 5
title: Show Name
type: episode
video_codec: XviD
video_codec: Xvid
? Show.Name.Season.1.2.3-5.HDTV.XviD-GoodGroup[SomeTrash]
? Show.Name.Season.1.2.3~5.HDTV.XviD-GoodGroup[SomeTrash]
? Show.Name.Season.1.2.3 to 5.HDTV.XviD-GoodGroup[SomeTrash]
: format: HDTV
: source: HDTV
release_group: GoodGroup[SomeTrash]
season:
- 1
@ -174,18 +177,19 @@
- 5
title: Show Name
type: episode
video_codec: XviD
video_codec: Xvid
? The.Get.Down.S01EP01.FRENCH.720p.WEBRIP.XVID-STR
: episode: 1
format: WEBRip
source: Web
other: Rip
language: fr
release_group: STR
screen_size: 720p
season: 1
title: The Get Down
type: episode
video_codec: XviD
video_codec: Xvid
? My.Name.Is.Earl.S01E01-S01E21.SWE-SUB
: episode:
@ -244,4 +248,84 @@
? epi
: options: -t episode
title: epi
title: epi
? Episode20
? Episode 20
: episode: 20
? Episode50
? Episode 50
: episode: 50
? Episode51
? Episode 51
: episode: 51
? Episode70
? Episode 70
: episode: 70
? Episode71
? Episode 71
: episode: 71
? S01D02.3-5-GROUP
: disc: [2, 3, 4, 5]
? S01D02&4-6&8
: disc: [2, 4, 5, 6, 8]
? Something.4x05-06
? Something - 4x05-06
? Something:4x05-06
? Something 4x05-06
? Something-4x05-06
: title: Something
season: 4
episode:
- 5
- 6
? Something.4x05-06
? Something - 4x05-06
? Something:4x05-06
? Something 4x05-06
? Something-4x05-06
: options: -T something
title: something
season: 4
episode:
- 5
- 6
? Colony 23/S01E01.Some.title.mkv
: title: Colony 23
season: 1
episode: 1
episode_title: Some title
? Show.Name.E02.2010.mkv
: options: -t episode
title: Show Name
year: 2010
episode: 2
? Show.Name.E02.S2010.mkv
: options: -t episode
title: Show Name
year: 2010
season: 2010
episode: 2
? Show.Name.E02.2010.mkv
: title: Show Name
year: 2010
episode: 2
? Show.Name.E02.S2010.mkv
: title: Show Name
year: 2010
season: 2010
episode: 2

View file

@ -1,112 +0,0 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? +VHS
? +VHSRip
? +VHS-Rip
? +VhS_rip
? +VHS.RIP
? -VHSAnythingElse
? -SomeVHS stuff
? -VH
? -VHx
? -VHxRip
: format: VHS
? +Cam
? +CamRip
? +CaM Rip
? +Cam_Rip
? +cam.rip
: format: Cam
? +Telesync
? +TS
? +HD TS
? -Hd.Ts # ts file extension
? -HD.TS # ts file extension
? +Hd-Ts
: format: Telesync
? +Workprint
? +workPrint
? +WorkPrint
? +WP
? -Work Print
: format: Workprint
? +Telecine
? +teleCine
? +TC
? -Tele Cine
: format: Telecine
? +PPV
? +ppv-rip
: format: PPV
? -TV
? +SDTV
? +SDTVRIP
? +Rip sd tv
? +TvRip
? +Rip TV
: format: TV
? +DVB
? +DVB-Rip
? +DvBRiP
? +pdTV
? +Pd Tv
: format: DVB
? +DVD
? +DVD-RIP
? +video ts
? +DVDR
? +DVD 9
? +dvd 5
? -dvd ts
: format: DVD
-format: ts
? +HDTV
? +tv rip hd
? +HDtv Rip
? +HdRip
: format: HDTV
? +VOD
? +VodRip
? +vod rip
: format: VOD
? +webrip
? +Web Rip
: format: WEBRip
? +webdl
? +Web DL
? +webHD
? +WEB hd
? +web
: format: WEB-DL
? +HDDVD
? +hd dvd
? +hdDvdRip
: format: HD-DVD
? +BluRay
? +BluRay rip
? +BD
? +BR
? +BDRip
? +BR rip
? +BD5
? +BD9
? +BD25
? +bd50
: format: BluRay
? XVID.NTSC.DVDR.nfo
: format: DVD

View file

@ -36,4 +36,12 @@
? +ENG.-.SubSV
? +ENG.-.SVSUB
: language: English
subtitle_language: Swedish
subtitle_language: Swedish
? The English Patient (1996)
: title: The English Patient
-language: english
? French.Kiss.1995.1080p
: title: French Kiss
-language: french

View file

@ -12,42 +12,35 @@
? +AudioFixed
? +Audio Fix
? +Audio Fixed
: other: AudioFix
: other: Audio Fixed
? +SyncFix
? +SyncFixed
? +Sync Fix
? +Sync Fixed
: other: SyncFix
: other: Sync Fixed
? +DualAudio
? +Dual Audio
: other: DualAudio
: other: Dual Audio
? +ws
? +WideScreen
? +Wide Screen
: other: WideScreen
: other: Widescreen
? +NF
? +Netflix
: other: Netflix
# Fix and Real must be surround by others properties to be matched.
? DVD.Real.XViD
# Fix must be surround by others properties to be matched.
? DVD.fix.XViD
? -DVD.Real
? -DVD.Fix
? -Real.XViD
? -Fix.XViD
: other: Proper
proper_count: 1
: other: Fix
-proper_count: 1
? -DVD.BlablaBla.Fix.Blablabla.XVID
? -DVD.BlablaBla.Fix.XVID
? -DVD.Fix.Blablabla.XVID
: other: Proper
proper_count: 1
: other: Fix
-proper_count: 1
? DVD.Real.PROPER.REPACK
@ -62,18 +55,20 @@
proper_count: 1
? XViD.Fansub
: other: Fansub
: other: Fan Subtitled
? XViD.Fastsub
: other: Fastsub
: other: Fast Subtitled
? +Season Complete
? -Complete
: other: Complete
? R5
: other: Region 5
? RC
: other: R5
: other: Region C
? PreAir
? Pre Air
@ -91,20 +86,26 @@
? HD
: other: HD
? mHD # ??
: other: mHD
? FHD
? FullHD
? Full HD
: other: Full HD
? UHD
? Ultra
? UltraHD
? Ultra HD
: other: Ultra HD
? mHD # ??
? HDLight
: other: HDLight
: other: Micro HD
? HQ
: other: HQ
? ddc
: other: DDC
: other: High Quality
? hr
: other: HR
: other: High Resolution
? PAL
: other: PAL
@ -115,14 +116,14 @@
? NTSC
: other: NTSC
? CC
: other: CC
? LDTV
: other: Low Definition
? LD
: other: LD
: other: Line Dubbed
? MD
: other: MD
: other: Mic Dubbed
? -The complete movie
: other: Complete
@ -131,7 +132,38 @@
: title: The complete movie
? +AC3-HQ
: audio_profile: HQ
: audio_profile: High Quality
? Other-HQ
: other: HQ
: other: High Quality
? reenc
? re-enc
? re-encoded
? reencoded
: other: Reencoded
? CONVERT XViD
: other: Converted
? +HDRIP # it's a Rip from non specified HD source
: other: [HD, Rip]
? SDR
: other: Standard Dynamic Range
? HDR
? HDR10
? -HDR100
: other: HDR10
? BT2020
? BT.2020
? -BT.20200
? -BT.2021
: other: BT.2020
? Upscaled
? Upscale
: other: Upscaled

View file

@ -0,0 +1,46 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement
from rebulk.match import Matches, Match
from ...rules.processors import StripSeparators
def test_strip_separators():
strip_separators = StripSeparators()
matches = Matches()
m = Match(3, 11, input_string="pre.ABCDEF.post")
assert m.raw == '.ABCDEF.'
matches.append(m)
returned_matches = strip_separators.when(matches, None)
assert returned_matches == matches
strip_separators.then(matches, returned_matches, None)
assert m.raw == 'ABCDEF'
def test_strip_separators_keep_acronyms():
strip_separators = StripSeparators()
matches = Matches()
m = Match(0, 13, input_string=".S.H.I.E.L.D.")
m2 = Match(0, 22, input_string=".Agent.Of.S.H.I.E.L.D.")
assert m.raw == '.S.H.I.E.L.D.'
matches.append(m)
matches.append(m2)
returned_matches = strip_separators.when(matches, None)
assert returned_matches == matches
strip_separators.then(matches, returned_matches, None)
assert m.raw == '.S.H.I.E.L.D.'
assert m2.raw == 'Agent.Of.S.H.I.E.L.D.'

View file

@ -39,3 +39,33 @@
season: 1
title: Test
type: episode
? Show.Name.x264-byEMP
: title: Show Name
video_codec: H.264
release_group: byEMP
? Show.Name.x264-NovaRip
: title: Show Name
video_codec: H.264
release_group: NovaRip
? Show.Name.x264-PARTiCLE
: title: Show Name
video_codec: H.264
release_group: PARTiCLE
? Show.Name.x264-POURMOi
: title: Show Name
video_codec: H.264
release_group: POURMOi
? Show.Name.x264-RipPourBox
: title: Show Name
video_codec: H.264
release_group: RipPourBox
? Show.Name.x264-RiPRG
: title: Show Name
video_codec: H.264
release_group: RiPRG

View file

@ -2,68 +2,279 @@
# Use - marker to check inputs that should not match results.
? +360p
? +360px
? +360i
? "+360"
? -360
? +500x360
? -250x360
: screen_size: 360p
? +640x360
? -640x360i
? -684x360i
: screen_size: 360p
aspect_ratio: 1.778
? +360i
: screen_size: 360i
? +480x360i
? -480x360p
? -450x360
: screen_size: 360i
aspect_ratio: 1.333
? +368p
? +368px
? +368i
? "+368"
? -368i
? -368
? +500x368
: screen_size: 368p
? -490x368
? -700x368
: screen_size: 368p
? +492x368p
: screen_size:
aspect_ratio: 1.337
? +654x368
: screen_size: 368p
aspect_ratio: 1.777
? +698x368
: screen_size: 368p
aspect_ratio: 1.897
? +368i
: -screen_size: 368i
? +480p
? +480px
? +480i
? "+480"
? +500x480
? -480i
? -480
? -500x480
? -638x480
? -920x480
: screen_size: 480p
? +640x480
: screen_size: 480p
aspect_ratio: 1.333
? +852x480
: screen_size: 480p
aspect_ratio: 1.775
? +910x480
: screen_size: 480p
aspect_ratio: 1.896
? +500x480
? +500 x 480
? +500 * 480
? +500x480p
? +500X480i
: screen_size: 500x480
aspect_ratio: 1.042
? +480i
? +852x480i
: screen_size: 480i
? +576p
? +576px
? +576i
? "+576"
? +500x576
? -576i
? -576
? -500x576
? -766x576
? -1094x576
: screen_size: 576p
? +768x576
: screen_size: 576p
aspect_ratio: 1.333
? +1024x576
: screen_size: 576p
aspect_ratio: 1.778
? +1092x576
: screen_size: 576p
aspect_ratio: 1.896
? +500x576
: screen_size: 500x576
aspect_ratio: 0.868
? +576i
: screen_size: 576i
? +720p
? +720px
? -720i
? 720hd
? 720pHD
? +720i
? "+720"
? +500x720
? -720
? -500x720
? -950x720
? -1368x720
: screen_size: 720p
? +960x720
: screen_size: 720p
aspect_ratio: 1.333
? +1280x720
: screen_size: 720p
aspect_ratio: 1.778
? +1366x720
: screen_size: 720p
aspect_ratio: 1.897
? +500x720
: screen_size: 500x720
aspect_ratio: 0.694
? +900p
? +900px
? +900i
? "+900"
? +500x900
? -900i
? -900
? -500x900
? -1198x900
? -1710x900
: screen_size: 900p
? +1200x900
: screen_size: 900p
aspect_ratio: 1.333
? +1600x900
: screen_size: 900p
aspect_ratio: 1.778
? +1708x900
: screen_size: 900p
aspect_ratio: 1.898
? +500x900
? +500x900p
? +500x900i
: screen_size: 500x900
aspect_ratio: 0.556
? +900i
: screen_size: 900i
? +1080p
? +1080px
? +1080hd
? +1080pHD
? -1080i
? "+1080"
? +500x1080
? -1080
? -500x1080
? -1438x1080
? -2050x1080
: screen_size: 1080p
? +1440x1080
: screen_size: 1080p
aspect_ratio: 1.333
? +1920x1080
: screen_size: 1080p
aspect_ratio: 1.778
? +2048x1080
: screen_size: 1080p
aspect_ratio: 1.896
? +1080i
? -1080p
: screen_size: 1080i
? 1440p
: screen_size: 1440p
? +500x1080
: screen_size: 500x1080
aspect_ratio: 0.463
? +2160p
? +2160px
? +2160i
? "+2160"
? -2160i
? -2160
? +4096x2160
: screen_size: 4K
? +4k
? -2878x2160
? -4100x2160
: screen_size: 2160p
? +2880x2160
: screen_size: 2160p
aspect_ratio: 1.333
? +3840x2160
: screen_size: 2160p
aspect_ratio: 1.778
? +4098x2160
: screen_size: 2160p
aspect_ratio: 1.897
? +500x2160
: screen_size: 500x2160
aspect_ratio: 0.231
? +4320p
? +4320px
? -4320i
? -4320
? -5758x2160
? -8198x2160
: screen_size: 4320p
? +5760x4320
: screen_size: 4320p
aspect_ratio: 1.333
? +7680x4320
: screen_size: 4320p
aspect_ratio: 1.778
? +8196x4320
: screen_size: 4320p
aspect_ratio: 1.897
? +500x4320
: screen_size: 500x4320
aspect_ratio: 0.116
? Test.File.720hd.bluray
? Test.File.720p24
? Test.File.720p30
? Test.File.720p50
? Test.File.720p60
? Test.File.720p120
: screen_size: 720p
? Test.File.400p
: options:
advanced_config:
screen_size:
progressive: ["400"]
screen_size: 400p
? Test.File2.400p
: options:
advanced_config:
screen_size:
progressive: ["400"]
screen_size: 400p
? Test.File.720p
: options:
advanced_config:
screen_size:
progressive: ["400"]
screen_size: 720p

View file

@ -0,0 +1,8 @@
? 1.1tb
: size: 1.1TB
? 123mb
: size: 123MB
? 4.3gb
: size: 4.3GB

View file

@ -0,0 +1,323 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? +VHS
? -VHSAnythingElse
? -SomeVHS stuff
? -VH
? -VHx
: source: VHS
-other: Rip
? +VHSRip
? +VHS-Rip
? +VhS_rip
? +VHS.RIP
? -VHS
? -VHxRip
: source: VHS
other: Rip
? +Cam
: source: Camera
-other: Rip
? +CamRip
? +CaM Rip
? +Cam_Rip
? +cam.rip
? -Cam
: source: Camera
other: Rip
? +HDCam
? +HD-Cam
: source: HD Camera
-other: Rip
? +HDCamRip
? +HD-Cam.rip
? -HDCam
? -HD-Cam
: source: HD Camera
other: Rip
? +Telesync
? +TS
: source: Telesync
-other: Rip
? +TelesyncRip
? +TSRip
? -Telesync
? -TS
: source: Telesync
other: Rip
? +HD TS
? -Hd.Ts # ts file extension
? -HD.TS # ts file extension
? +Hd-Ts
: source: HD Telesync
-other: Rip
? +HD TS Rip
? +Hd-Ts-Rip
? -HD TS
? -Hd-Ts
: source: HD Telesync
other: Rip
? +Workprint
? +workPrint
? +WorkPrint
? +WP
? -Work Print
: source: Workprint
-other: Rip
? +Telecine
? +teleCine
? +TC
? -Tele Cine
: source: Telecine
-other: Rip
? +Telecine Rip
? +teleCine-Rip
? +TC-Rip
? -Telecine
? -TC
: source: Telecine
other: Rip
? +HD-TELECINE
? +HDTC
: source: HD Telecine
-other: Rip
? +HD-TCRip
? +HD TELECINE RIP
? -HD-TELECINE
? -HDTC
: source: HD Telecine
other: Rip
? +PPV
: source: Pay-per-view
-other: Rip
? +ppv-rip
? -PPV
: source: Pay-per-view
other: Rip
? -TV
? +SDTV
? +TV-Dub
: source: TV
-other: Rip
? +SDTVRIP
? +Rip sd tv
? +TvRip
? +Rip TV
? -TV
? -SDTV
: source: TV
other: Rip
? +DVB
? +pdTV
? +Pd Tv
: source: Digital TV
-other: Rip
? +DVB-Rip
? +DvBRiP
? +pdtvRiP
? +pd tv RiP
? -DVB
? -pdTV
? -Pd Tv
: source: Digital TV
other: Rip
? +DVD
? +video ts
? +DVDR
? +DVD 9
? +dvd 5
? -dvd ts
: source: DVD
-source: Telesync
-other: Rip
? +DVD-RIP
? -video ts
? -DVD
? -DVDR
? -DVD 9
? -dvd 5
: source: DVD
other: Rip
? +HDTV
: source: HDTV
-other: Rip
? +tv rip hd
? +HDtv Rip
? -HdRip # it's a Rip from non specified HD source
? -HDTV
: source: HDTV
other: Rip
? +VOD
: source: Video on Demand
-other: Rip
? +VodRip
? +vod rip
? -VOD
: source: Video on Demand
other: Rip
? +webrip
? +Web Rip
? +webdlrip
? +web dl rip
? +webcap
? +web cap
? +webcaprip
? +web cap rip
: source: Web
other: Rip
? +webdl
? +Web DL
? +webHD
? +WEB hd
? +web
: source: Web
-other: Rip
? +HDDVD
? +hd dvd
: source: HD-DVD
-other: Rip
? +hdDvdRip
? -HDDVD
? -hd dvd
: source: HD-DVD
other: Rip
? +BluRay
? +BD
? +BD5
? +BD9
? +BD25
? +bd50
: source: Blu-ray
-other: Rip
? +BR-Scr
? +BR.Screener
: source: Blu-ray
other: [Reencoded, Screener]
-language: pt-BR
? +BR-Rip
? +BRRip
: source: Blu-ray
other: [Reencoded, Rip]
-language: pt-BR
? +BluRay rip
? +BDRip
? -BluRay
? -BD
? -BR
? -BR rip
? -BD5
? -BD9
? -BD25
? -bd50
: source: Blu-ray
other: Rip
? XVID.NTSC.DVDR.nfo
: source: DVD
-other: Rip
? +AHDTV
: source: Analog HDTV
-other: Rip
? +dsr
? +dth
: source: Satellite
-other: Rip
? +dsrip
? +ds rip
? +dsrrip
? +dsr rip
? +satrip
? +sat rip
? +dthrip
? +dth rip
? -dsr
? -dth
: source: Satellite
other: Rip
? +UHDTV
: source: Ultra HDTV
-other: Rip
? +UHDRip
? +UHDTV Rip
? -UHDTV
: source: Ultra HDTV
other: Rip
? UHD Bluray
? UHD 2160p Bluray
? UHD 8bit Bluray
? UHD HQ 8bit Bluray
? Ultra Bluray
? Ultra HD Bluray
? Bluray ULTRA
? Bluray Ultra HD
? Bluray UHD
? 4K Bluray
? 2160p Bluray
? UHD 10bit HDR Bluray
? UHD HDR10 Bluray
? -HD Bluray
? -AMERICAN ULTRA (2015) 1080p Bluray
? -American.Ultra.2015.BRRip
? -BRRip XviD AC3-ULTRAS
? -UHD Proper Bluray
: source: Ultra HD Blu-ray
? UHD.BRRip
? UHD.2160p.BRRip
? BRRip.2160p.UHD
? BRRip.[4K-2160p-UHD]
: source: Ultra HD Blu-ray
other: [Reencoded, Rip]
? UHD.2160p.BDRip
? BDRip.[4K-2160p-UHD]
: source: Ultra HD Blu-ray
other: Rip
? DM
: source: Digital Master
? DMRIP
? DM-RIP
: source: Digital Master
other: Rip

View file

@ -30,3 +30,14 @@
? Some.Other title/Some other title.mkv
: title: Some Other title
? This T.I.T.L.E. has dots
? This.T.I.T.L.E..has.dots
: title: This T.I.T.L.E has dots
? This.T.I.T.L.E..has.dots.S01E02.This E.P.T.I.T.L.E.has.dots
: title: This T.I.T.L.E has dots
season: 1
episode: 2
episode_title: This E.P.T.I.T.L.E has dots
type: episode

View file

@ -6,15 +6,19 @@
? Rv30
? rv40
? -xrv40
: video_codec: Real
: video_codec: RealVideo
? mpeg2
? MPEG2
? MPEG-2
? mpg2
? H262
? H.262
? x262
? -mpeg
? -mpeg 2 # Not sure if we should ignore this one ...
? -xmpeg2
? -mpeg2x
: video_codec: Mpeg2
: video_codec: MPEG-2
? DivX
? -div X
@ -26,19 +30,25 @@
? XviD
? xvid
? -x vid
: video_codec: XviD
: video_codec: Xvid
? h263
? x263
? h.263
: video_codec: H.263
? h264
? x264
? h.264
? x.264
? mpeg4-AVC
? AVC
? AVCHD
? -MPEG-4
? -mpeg4
? -mpeg
? -h 265
? -x265
: video_codec: h264
: video_codec: H.264
? h265
? x265
@ -47,8 +57,42 @@
? hevc
? -h 264
? -x264
: video_codec: h265
: video_codec: H.265
? hevc10
? HEVC-YUV420P10
: video_codec: H.265
color_depth: 10-bit
? h265-HP
: video_codec: h265
video_profile: HP
: video_codec: H.265
video_profile: High
? H.264-SC
: video_codec: H.264
video_profile: Scalable Video Coding
? mpeg4-AVC
: video_codec: H.264
video_profile: Advanced Video Codec High Definition
? AVCHD-SC
? H.264-AVCHD-SC
: video_codec: H.264
video_profile:
- Scalable Video Coding
- Advanced Video Codec High Definition
? VC1
? VC-1
: video_codec: VC-1
? VP7
: video_codec: VP7
? VP8
? VP80
: video_codec: VP8
? VP9
: video_codec: VP9

File diff suppressed because it is too large Load diff

View file

@ -27,6 +27,14 @@ def test_forced_binary():
assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type)
@pytest.mark.skipif('sys.version_info < (3, 4)', reason="Path is not available")
def test_pathlike_object():
from pathlib import Path
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
ret = guessit(path)
assert ret and 'title' in ret
def test_unicode_japanese():
ret = guessit('[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi')
assert ret and 'title' in ret

View file

@ -53,6 +53,14 @@ if six.PY2:
"""
def test_ensure_standard_string_class():
class CustomStr(str):
pass
ret = guessit(CustomStr('1080p'), options={'advanced': True})
assert ret and 'screen_size' in ret and not isinstance(ret['screen_size'].input_string, CustomStr)
def test_properties():
props = properties()
assert 'video_codec' in props.keys()

View file

@ -0,0 +1,175 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement
import os
import pytest
from ..options import get_options_file_locations, merge_options, load_config_file, ConfigurationException, \
load_config
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
def test_config_locations():
homedir = '/root'
cwd = '/root/cwd'
locations = get_options_file_locations(homedir, cwd, True)
assert len(locations) == 9
assert '/root/.guessit/options.json' in locations
assert '/root/.guessit/options.yml' in locations
assert '/root/.guessit/options.yaml' in locations
assert '/root/.config/guessit/options.json' in locations
assert '/root/.config/guessit/options.yml' in locations
assert '/root/.config/guessit/options.yaml' in locations
assert '/root/cwd/guessit.options.json' in locations
assert '/root/cwd/guessit.options.yml' in locations
assert '/root/cwd/guessit.options.yaml' in locations
def test_merge_configurations():
c1 = {'param1': True, 'param2': True, 'param3': False}
c2 = {'param1': False, 'param2': True, 'param3': False}
c3 = {'param1': False, 'param2': True, 'param3': False}
merged = merge_options(c1, c2, c3)
assert not merged['param1']
assert merged['param2']
assert not merged['param3']
merged = merge_options(c3, c2, c1)
assert merged['param1']
assert merged['param2']
assert not merged['param3']
def test_merge_configurations_lists():
c1 = {'param1': [1], 'param2': True, 'param3': False}
c2 = {'param1': [2], 'param2': True, 'param3': False}
c3 = {'param1': [3], 'param2': True, 'param3': False}
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [1, 2, 3]
assert merged['param2']
assert not merged['param3']
merged = merge_options(c3, c2, c1)
assert merged['param1'] == [3, 2, 1]
assert merged['param2']
assert not merged['param3']
def test_merge_configurations_deep():
c1 = {'param1': [1], 'param2': {'d1': [1]}, 'param3': False}
c2 = {'param1': [2], 'param2': {'d1': [2]}, 'param3': False}
c3 = {'param1': [3], 'param2': {'d3': [3]}, 'param3': False}
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [1, 2, 3]
assert merged['param2']['d1'] == [1, 2]
assert merged['param2']['d3'] == [3]
assert 'd2' not in merged['param2']
assert not merged['param3']
merged = merge_options(c3, c2, c1)
assert merged['param1'] == [3, 2, 1]
assert merged['param2']
assert merged['param2']['d1'] == [2, 1]
assert 'd2' not in merged['param2']
assert merged['param2']['d3'] == [3]
assert not merged['param3']
def test_merge_configurations_pristine_all():
c1 = {'param1': [1], 'param2': True, 'param3': False}
c2 = {'param1': [2], 'param2': True, 'param3': False, 'pristine': True}
c3 = {'param1': [3], 'param2': True, 'param3': False}
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [2, 3]
assert merged['param2']
assert not merged['param3']
merged = merge_options(c3, c2, c1)
assert merged['param1'] == [2, 1]
assert merged['param2']
assert not merged['param3']
def test_merge_configurations_pristine_properties():
c1 = {'param1': [1], 'param2': False, 'param3': True}
c2 = {'param1': [2], 'param2': True, 'param3': False, 'pristine': ['param2', 'param3']}
c3 = {'param1': [3], 'param2': True, 'param3': False}
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [1, 2, 3]
assert merged['param2']
assert not merged['param3']
def test_merge_configurations_pristine_properties_deep():
c1 = {'param1': [1], 'param2': {'d1': False}, 'param3': True}
c2 = {'param1': [2], 'param2': {'d1': True}, 'param3': False, 'pristine': ['param2', 'param3']}
c3 = {'param1': [3], 'param2': {'d1': True}, 'param3': False}
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [1, 2, 3]
assert merged['param2']
assert not merged['param3']
def test_merge_configurations_pristine_properties2():
c1 = {'param1': [1], 'param2': False, 'param3': True}
c2 = {'param1': [2], 'param2': True, 'param3': False, 'pristine': ['param1', 'param2', 'param3']}
c3 = {'param1': [3], 'param2': True, 'param3': False}
merged = merge_options(c1, c2, c3)
assert merged['param1'] == [2, 3]
assert merged['param2']
assert not merged['param3']
def test_load_config_file():
json_config = load_config_file(os.path.join(__location__, 'config', 'test.json'))
yml_config = load_config_file(os.path.join(__location__, 'config', 'test.yml'))
yaml_config = load_config_file(os.path.join(__location__, 'config', 'test.yaml'))
assert json_config['expected_title'] == ['The 100', 'OSS 117']
assert yml_config['expected_title'] == ['The 100', 'OSS 117']
assert yaml_config['expected_title'] == ['The 100', 'OSS 117']
assert json_config['yaml'] is False
assert yml_config['yaml'] is True
assert yaml_config['yaml'] is True
with pytest.raises(ConfigurationException) as excinfo:
load_config_file(os.path.join(__location__, 'config', 'dummy.txt'))
assert excinfo.match('Configuration file extension is not supported for ".*?dummy.txt" file\\.')
def test_load_config():
config = load_config({'no_default_config': True, 'param1': 'test',
'config': [os.path.join(__location__, 'config', 'test.yml')]})
assert not config.get('param1')
assert config.get('advanced_config') # advanced_config is still loaded from default
assert config['expected_title'] == ['The 100', 'OSS 117']
assert config['yaml'] is True
config = load_config({'no_default_config': True, 'param1': 'test'})
assert not config.get('param1')
assert 'expected_title' not in config
assert 'yaml' not in config
config = load_config({'no_default_config': True, 'param1': 'test', 'config': ['false']})
assert not config.get('param1')
assert 'expected_title' not in config
assert 'yaml' not in config

View file

@ -2,24 +2,20 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
import logging
import os
# io.open supports encoding= in python 2.7
from io import open # pylint: disable=redefined-builtin
import os
import yaml
import six
import babelfish
import pytest
import six
import yaml
from rebulk.remodule import re
from rebulk.utils import is_iterable
from guessit.options import parse_options
from ..yamlutils import OrderedDictYAMLLoader
from .. import guessit
from ..options import parse_options
from ..yamlutils import OrderedDictYAMLLoader
logger = logging.getLogger(__name__)
@ -64,17 +60,17 @@ class EntryResult(object):
def __repr__(self):
if self.ok:
return self.string + ': OK!'
elif self.warning:
if self.warning:
return '%s%s: WARNING! (valid=%i, extra=%i)' % ('-' if self.negates else '', self.string, len(self.valid),
len(self.extra))
elif self.error:
if self.error:
return '%s%s: ERROR! (valid=%i, missing=%i, different=%i, extra=%i, others=%i)' % \
('-' if self.negates else '', self.string, len(self.valid), len(self.missing), len(self.different),
len(self.extra), len(self.others))
else:
return '%s%s: UNKOWN! (valid=%i, missing=%i, different=%i, extra=%i, others=%i)' % \
('-' if self.negates else '', self.string, len(self.valid), len(self.missing), len(self.different),
len(self.extra), len(self.others))
return '%s%s: UNKOWN! (valid=%i, missing=%i, different=%i, extra=%i, others=%i)' % \
('-' if self.negates else '', self.string, len(self.valid), len(self.missing), len(self.different),
len(self.extra), len(self.others))
@property
def details(self):
@ -113,6 +109,8 @@ def files_and_ids(predicate=None):
ids = []
for (dirpath, _, filenames) in os.walk(__location__):
if os.path.split(dirpath)[-1] == 'config':
continue
if dirpath == __location__:
dirpath_rel = ''
else:
@ -134,7 +132,7 @@ class TestYml(object):
Use $ marker to check inputs that should not match results.
"""
options_re = re.compile(r'^([ \+-]+)(.*)')
options_re = re.compile(r'^([ +-]+)(.*)')
files, ids = files_and_ids(filename_predicate)
@ -147,7 +145,7 @@ class TestYml(object):
@pytest.mark.parametrize('filename', files, ids=ids)
def test(self, filename, caplog):
caplog.setLevel(logging.INFO)
caplog.set_level(logging.INFO)
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
data = yaml.load(infile, OrderedDictYAMLLoader)
entries = Results()
@ -173,8 +171,9 @@ class TestYml(object):
entries.assert_ok()
def check_data(self, filename, string, expected):
if six.PY2 and isinstance(string, six.text_type):
string = string.encode('utf-8')
if six.PY2:
if isinstance(string, six.text_type):
string = string.encode('utf-8')
converts = []
for k, v in expected.items():
if isinstance(v, six.text_type):
@ -187,13 +186,13 @@ class TestYml(object):
if not string_predicate or string_predicate(string): # pylint: disable=not-callable
entry = self.check(string, expected)
if entry.ok:
logger.debug('[' + filename + '] ' + str(entry))
logger.debug('[%s] %s', filename, entry)
elif entry.warning:
logger.warning('[' + filename + '] ' + str(entry))
logger.warning('[%s] %s', filename, entry)
elif entry.error:
logger.error('[' + filename + '] ' + str(entry))
logger.error('[%s] %s', filename, entry)
for line in entry.details:
logger.error('[' + filename + '] ' + ' ' * 4 + line)
logger.error('[%s] %s', filename, ' ' * 4 + line)
return entry
def check(self, string, expected):
@ -204,12 +203,10 @@ class TestYml(object):
options = {}
if not isinstance(options, dict):
options = parse_options(options)
if 'implicit' not in options:
options['implicit'] = True
try:
result = guessit(string, options)
except Exception as exc:
logger.error('[' + string + '] Exception: ' + str(exc))
logger.error('[%s] Exception: %s', string, exc)
raise exc
entry = EntryResult(string, negates)
@ -255,10 +252,10 @@ class TestYml(object):
return False
if isinstance(next(iter(values)), babelfish.Language):
# pylint: disable=no-member
expecteds = set([babelfish.Language.fromguessit(expected) for expected in expecteds])
expecteds = {babelfish.Language.fromguessit(expected) for expected in expecteds}
elif isinstance(next(iter(values)), babelfish.Country):
# pylint: disable=no-member
expecteds = set([babelfish.Country.fromguessit(expected) for expected in expecteds])
expecteds = {babelfish.Country.fromguessit(expected) for expected in expecteds}
return values == expecteds
def check_expected(self, result, expected, entry):
@ -271,10 +268,10 @@ class TestYml(object):
if negates_key:
entry.valid.append((expected_key, expected_value))
else:
entry.different.append((expected_key, expected_value, result[expected_key]))
entry.different.append((expected_key, expected_value, result[result_key]))
else:
if negates_key:
entry.different.append((expected_key, expected_value, result[expected_key]))
entry.different.append((expected_key, expected_value, result[result_key]))
else:
entry.valid.append((expected_key, expected_value))
elif not negates_key:

View file

@ -3,9 +3,9 @@
title: Fear and Loathing in Las Vegas
year: 1998
screen_size: 720p
format: HD-DVD
source: HD-DVD
audio_codec: DTS
video_codec: h264
video_codec: H.264
release_group: ESiR
? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
@ -36,8 +36,9 @@
episode_format: Minisode
episode: 1
episode_title: Good Cop Bad Cop
format: WEBRip
video_codec: XviD
source: Web
other: Rip
video_codec: Xvid
? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
: type: episode
@ -50,10 +51,10 @@
title: The Doors
year: 1991
date: 2008-03-09
format: BluRay
source: Blu-ray
screen_size: 720p
audio_codec: AC3
video_codec: h264
audio_codec: Dolby Digital
video_codec: H.264
release_group: HiS@SiLUHD
language: english
website: sharethefiles.com
@ -63,14 +64,15 @@
title: MASH
year: 1970
video_codec: DivX
format: DVD
source: DVD
other: [Dual Audio, Rip]
? the.mentalist.501.hdtv-lol.mp4
: type: episode
title: the mentalist
season: 5
episode: 1
format: HDTV
source: HDTV
release_group: lol
? the.simpsons.2401.hdtv-lol.mp4
@ -78,7 +80,7 @@
title: the simpsons
season: 24
episode: 1
format: HDTV
source: HDTV
release_group: lol
? Homeland.S02E01.HDTV.x264-EVOLVE.mp4
@ -86,8 +88,8 @@
title: Homeland
season: 2
episode: 1
format: HDTV
video_codec: h264
source: HDTV
video_codec: H.264
release_group: EVOLVE
? /media/Band_of_Brothers-e01-Currahee.mkv
@ -115,7 +117,7 @@
title: new girl
season: 1
episode: 17
format: HDTV
source: HDTV
release_group: lol
? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
@ -125,8 +127,8 @@
season: 1
episode: 3
episode_title: Health Care
format: HDTV
video_codec: XviD
source: HDTV
video_codec: Xvid
release_group: LOL
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
@ -154,18 +156,18 @@
season: 56
episode: 6
screen_size: 720p
format: HDTV
video_codec: h264
source: HDTV
video_codec: H.264
? White.House.Down.2013.1080p.BluRay.DTS-HD.MA.5.1.x264-PublicHD.mkv
: type: movie
title: White House Down
year: 2013
screen_size: 1080p
format: BluRay
audio_codec: DTS
audio_profile: HDMA
video_codec: h264
source: Blu-ray
audio_codec: DTS-HD
audio_profile: Master Audio
video_codec: H.264
release_group: PublicHD
audio_channels: "5.1"
@ -174,10 +176,10 @@
title: White House Down
year: 2013
screen_size: 1080p
format: BluRay
audio_codec: DTS
audio_profile: HDMA
video_codec: h264
source: Blu-ray
audio_codec: DTS-HD
audio_profile: Master Audio
video_codec: H.264
release_group: PublicHD
audio_channels: "5.1"
@ -188,10 +190,10 @@
season: 1
episode: 1
screen_size: 720p
format: WEB-DL
source: Web
audio_channels: "5.1"
video_codec: h264
audio_codec: DolbyDigital
video_codec: H.264
audio_codec: Dolby Digital
release_group: NTb
? Despicable.Me.2.2013.1080p.BluRay.x264-VeDeTT.nfo
@ -199,37 +201,39 @@
title: Despicable Me 2
year: 2013
screen_size: 1080p
format: BluRay
video_codec: h264
source: Blu-ray
video_codec: H.264
release_group: VeDeTT
? Le Cinquieme Commando 1971 SUBFORCED FRENCH DVDRiP XViD AC3 Bandix.mkv
: type: movie
audio_codec: AC3
format: DVD
audio_codec: Dolby Digital
source: DVD
other: Rip
release_group: Bandix
subtitle_language: French
title: Le Cinquieme Commando
video_codec: XviD
video_codec: Xvid
year: 1971
? Le Seigneur des Anneaux - La Communauté de l'Anneau - Version Longue - BDRip.mkv
: type: movie
format: BluRay
title: Le Seigneur des Anneaux
source: Blu-ray
other: Rip
? La petite bande (Michel Deville - 1983) VF PAL MP4 x264 AAC.mkv
: type: movie
audio_codec: AAC
language: French
title: La petite bande
video_codec: h264
video_codec: H.264
year: 1983
other: PAL
? Retour de Flammes (Gregor Schnitzler 2003) FULL DVD.iso
: type: movie
format: DVD
source: DVD
title: Retour de Flammes
type: movie
year: 2003
@ -250,16 +254,16 @@
: type: movie
year: 2014
title: A Common Title
edition: Special Edition
edition: Special
? Downton.Abbey.2013.Christmas.Special.HDTV.x264-FoV.mp4
: type: episode
year: 2013
title: Downton Abbey
episode_title: Christmas Special
video_codec: h264
video_codec: H.264
release_group: FoV
format: HDTV
source: HDTV
episode_details: Special
? Doctor_Who_2013_Christmas_Special.The_Time_of_The_Doctor.HD
@ -280,10 +284,10 @@
? Robot Chicken S06-Born Again Virgin Christmas Special HDTV x264.avi
: type: episode
title: Robot Chicken
format: HDTV
source: HDTV
season: 6
episode_title: Born Again Virgin Christmas Special
video_codec: h264
video_codec: H.264
episode_details: Special
? Wicked.Tuna.S03E00.Head.To.Tail.Special.HDTV.x264-YesTV
@ -293,14 +297,14 @@
release_group: YesTV
season: 3
episode: 0
video_codec: h264
format: HDTV
video_codec: H.264
source: HDTV
episode_details: Special
? The.Voice.UK.S03E12.HDTV.x264-C4TV
: episode: 12
video_codec: h264
format: HDTV
video_codec: H.264
source: HDTV
title: The Voice
release_group: C4TV
season: 3
@ -317,21 +321,21 @@
? FlexGet.S01E02.TheName.HDTV.xvid
: episode: 2
format: HDTV
source: HDTV
season: 1
title: FlexGet
episode_title: TheName
type: episode
video_codec: XviD
video_codec: Xvid
? FlexGet.S01E02.TheName.HDTV.xvid
: episode: 2
format: HDTV
source: HDTV
season: 1
title: FlexGet
episode_title: TheName
type: episode
video_codec: XviD
video_codec: Xvid
? some.series.S03E14.Title.Here.720p
: episode: 14
@ -362,7 +366,7 @@
? Something.Season.2.1of4.Ep.Title.HDTV.torrent
: episode_count: 4
episode: 1
format: HDTV
source: HDTV
season: 2
title: Something
episode_title: Title
@ -372,7 +376,7 @@
? Show-A (US) - Episode Title S02E09 hdtv
: country: US
episode: 9
format: HDTV
source: HDTV
season: 2
title: Show-A
type: episode
@ -402,23 +406,25 @@
type: movie
? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720 * 432].avi
: format: DVD
: source: DVD
other: Rip
screen_size: 720x432
title: El Bosque Animado
video_codec: XviD
video_codec: Xvid
year: 1987
type: movie
? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi
: format: DVD
: source: DVD
other: Rip
screen_size: 720x432
title: El Bosque Animado
video_codec: XviD
video_codec: Xvid
year: 1987
type: movie
? 2009.shoot.fruit.chan.multi.dvd9.pal
: format: DVD
: source: DVD
language: mul
other: PAL
title: shoot fruit chan
@ -426,7 +432,7 @@
year: 2009
? 2009.shoot.fruit.chan.multi.dvd5.pal
: format: DVD
: source: DVD
language: mul
other: PAL
title: shoot fruit chan
@ -435,25 +441,25 @@
? The.Flash.2014.S01E01.PREAIR.WEBRip.XviD-EVO.avi
: episode: 1
format: WEBRip
other: Preair
source: Web
other: [Preair, Rip]
release_group: EVO
season: 1
title: The Flash
type: episode
video_codec: XviD
video_codec: Xvid
year: 2014
? Ice.Lake.Rebels.S01E06.Ice.Lake.Games.720p.HDTV.x264-DHD
: episode: 6
format: HDTV
source: HDTV
release_group: DHD
screen_size: 720p
season: 1
title: Ice Lake Rebels
episode_title: Ice Lake Games
type: episode
video_codec: h264
video_codec: H.264
? The League - S06E10 - Epi Sexy.mkv
: episode: 10
@ -463,23 +469,23 @@
type: episode
? Stay (2005) [1080p]/Stay.2005.1080p.BluRay.x264.YIFY.mp4
: format: BluRay
: source: Blu-ray
release_group: YIFY
screen_size: 1080p
title: Stay
type: movie
video_codec: h264
video_codec: H.264
year: 2005
? /media/live/A/Anger.Management.S02E82.720p.HDTV.X264-DIMENSION.mkv
: format: HDTV
: source: HDTV
release_group: DIMENSION
screen_size: 720p
title: Anger Management
type: episode
season: 2
episode: 82
video_codec: h264
video_codec: H.264
? "[Figmentos] Monster 34 - At the End of Darkness [781219F1].mkv"
: type: episode
@ -492,7 +498,7 @@
? Game.of.Thrones.S05E07.720p.HDTV-KILLERS.mkv
: type: episode
episode: 7
format: HDTV
source: HDTV
release_group: KILLERS
screen_size: 720p
season: 5
@ -501,7 +507,7 @@
? Game.of.Thrones.S05E07.HDTV.720p-KILLERS.mkv
: type: episode
episode: 7
format: HDTV
source: HDTV
release_group: KILLERS
screen_size: 720p
season: 5
@ -519,8 +525,8 @@
title: Star Trek Into Darkness
year: 2013
screen_size: 720p
format: WEB-DL
video_codec: h264
source: Web
video_codec: H.264
release_group: publichd
? /var/medias/series/The Originals/Season 02/The.Originals.S02E15.720p.HDTV.X264-DIMENSION.mkv
@ -529,8 +535,8 @@
season: 2
episode: 15
screen_size: 720p
format: HDTV
video_codec: h264
source: HDTV
video_codec: H.264
release_group: DIMENSION
? Test.S01E01E07-FooBar-Group.avi
@ -539,202 +545,211 @@
- 1
- 7
episode_title: FooBar-Group # Make sure it doesn't conflict with uuid
mimetype: video/x-msvideo
season: 1
title: Test
type: episode
? TEST.S01E02.2160p.NF.WEBRip.x264.DD5.1-ABC
: audio_channels: '5.1'
audio_codec: DolbyDigital
audio_codec: Dolby Digital
episode: 2
format: WEBRip
other: Netflix
source: Web
other: Rip
release_group: ABC
screen_size: 4K
screen_size: 2160p
season: 1
streaming_service: Netflix
title: TEST
type: episode
video_codec: h264
video_codec: H.264
? TEST.2015.12.30.720p.WEBRip.h264-ABC
: date: 2015-12-30
format: WEBRip
source: Web
other: Rip
release_group: ABC
screen_size: 720p
title: TEST
type: episode
video_codec: h264
video_codec: H.264
? TEST.S01E10.24.1080p.NF.WEBRip.AAC2.0.x264-ABC
: audio_channels: '2.0'
audio_codec: AAC
episode: 10
episode_title: '24'
format: WEBRip
other: Netflix
source: Web
other: Rip
release_group: ABC
screen_size: 1080p
season: 1
streaming_service: Netflix
title: TEST
type: episode
video_codec: h264
video_codec: H.264
? TEST.S01E10.24.1080p.NF.WEBRip.AAC2.0.x264-ABC
: audio_channels: '2.0'
audio_codec: AAC
episode: 10
episode_title: '24'
format: WEBRip
other: Netflix
source: Web
other: Rip
release_group: ABC
screen_size: 1080p
season: 1
streaming_service: Netflix
title: TEST
type: episode
video_codec: h264
video_codec: H.264
? TEST.S01E10.24.1080p.NF.WEBRip.AAC.2.0.x264-ABC
: audio_channels: '2.0'
audio_codec: AAC
episode: 10
episode_title: '24'
format: WEBRip
other: Netflix
source: Web
other: Rip
release_group: ABC
screen_size: 1080p
season: 1
streaming_service: Netflix
title: TEST
type: episode
video_codec: h264
video_codec: H.264
? TEST.S05E02.720p.iP.WEBRip.AAC2.0.H264-ABC
: audio_channels: '2.0'
audio_codec: AAC
episode: 2
format: WEBRip
source: Web
other: Rip
release_group: ABC
screen_size: 720p
season: 5
title: TEST
type: episode
video_codec: h264
video_codec: H.264
? TEST.S03E07.720p.WEBRip.AAC2.0.x264-ABC
: audio_channels: '2.0'
audio_codec: AAC
episode: 7
format: WEBRip
source: Web
other: Rip
release_group: ABC
screen_size: 720p
season: 3
title: TEST
type: episode
video_codec: h264
video_codec: H.264
? TEST.S15E15.24.1080p.FREE.WEBRip.AAC2.0.x264-ABC
: audio_channels: '2.0'
audio_codec: AAC
episode: 15
episode_title: '24'
format: WEBRip
source: Web
other: Rip
release_group: ABC
screen_size: 1080p
season: 15
title: TEST
type: episode
video_codec: h264
video_codec: H.264
? TEST.S11E11.24.720p.ETV.WEBRip.AAC2.0.x264-ABC
: audio_channels: '2.0'
audio_codec: AAC
episode: 11
episode_title: '24'
format: WEBRip
source: Web
other: Rip
release_group: ABC
screen_size: 720p
season: 11
title: TEST
type: episode
video_codec: h264
video_codec: H.264
? TEST.2015.1080p.HC.WEBRip.x264.AAC2.0-ABC
: audio_channels: '2.0'
audio_codec: AAC
format: WEBRip
source: Web
other: Rip
release_group: ABC
screen_size: 1080p
title: TEST
type: movie
video_codec: h264
video_codec: H.264
year: 2015
? TEST.2015.1080p.3D.BluRay.Half-SBS.x264.DTS-HD.MA.7.1-ABC
: audio_channels: '7.1'
audio_codec: DTS
audio_profile: HDMA
format: BluRay
audio_codec: DTS-HD
audio_profile: Master Audio
source: Blu-ray
other: 3D
release_group: ABC
screen_size: 1080p
title: TEST
type: movie
video_codec: h264
video_codec: H.264
year: 2015
? TEST.2015.1080p.3D.BluRay.Half-OU.x264.DTS-HD.MA.7.1-ABC
: audio_channels: '7.1'
audio_codec: DTS
audio_profile: HDMA
format: BluRay
audio_codec: DTS-HD
audio_profile: Master Audio
source: Blu-ray
other: 3D
release_group: ABC
screen_size: 1080p
title: TEST
type: movie
video_codec: h264
video_codec: H.264
year: 2015
? TEST.2015.1080p.3D.BluRay.Half-OU.x264.DTS-HD.MA.TrueHD.7.1.Atmos-ABC
: audio_channels: '7.1'
audio_codec:
- DTS
- TrueHD
- DolbyAtmos
audio_profile: HDMA
format: BluRay
- DTS-HD
- Dolby TrueHD
- Dolby Atmos
audio_profile: Master Audio
source: Blu-ray
other: 3D
release_group: ABC
screen_size: 1080p
title: TEST
type: movie
video_codec: h264
video_codec: H.264
year: 2015
? TEST.2015.1080p.3D.BluRay.Half-SBS.x264.DTS-HD.MA.TrueHD.7.1.Atmos-ABC
: audio_channels: '7.1'
audio_codec:
- DTS
- TrueHD
- DolbyAtmos
audio_profile: HDMA
format: BluRay
- DTS-HD
- Dolby TrueHD
- Dolby Atmos
audio_profile: Master Audio
source: Blu-ray
other: 3D
release_group: ABC
screen_size: 1080p
title: TEST
type: movie
video_codec: h264
video_codec: H.264
year: 2015
? TEST.2015.1080p.BluRay.REMUX.AVC.DTS-HD.MA.TrueHD.7.1.Atmos-ABC
: audio_channels: '7.1'
audio_codec:
- DTS
- TrueHD
- DolbyAtmos
audio_profile: HDMA
format: BluRay
- DTS-HD
- Dolby TrueHD
- Dolby Atmos
audio_profile: Master Audio
source: Blu-ray
other: Remux
release_group: ABC
screen_size: 1080p
@ -743,58 +758,191 @@
year: 2015
? Gangs of New York 2002 REMASTERED 1080p BluRay x264-AVCHD
: format: BluRay
other: Remastered
: source: Blu-ray
edition: Remastered
screen_size: 1080p
title: Gangs of New York
type: movie
video_codec: h264
video_codec: H.264
video_profile: Advanced Video Codec High Definition
year: 2002
? Peep.Show.S06E02.DVDrip.x264-faks86.mkv
: container: mkv
episode: 2
format: DVD
source: DVD
other: Rip
release_group: faks86
season: 6
title: Peep Show
type: episode
video_codec: h264
video_codec: H.264
# Episode title is indeed 'October 8, 2014'
# https://thetvdb.com/?tab=episode&seriesid=82483&seasonid=569935&id=4997362&lid=7
? The Soup - 11x41 - October 8, 2014.mp4
: container: mp4
episode: 41
episode_title: October 8
episode_title: October 8, 2014
season: 11
title: The Soup
type: episode
year: 2014
? Red.Rock.S02E59.WEB-DLx264-JIVE
: episode: 59
season: 2
format: WEB-DL
source: Web
release_group: JIVE
title: Red Rock
type: episode
video_codec: h264
video_codec: H.264
? Pawn.Stars.S12E31.Deals.On.Wheels.PDTVx264-JIVE
: episode: 31
episode_title: Deals On Wheels
season: 12
format: DVB
source: Digital TV
release_group: JIVE
title: Pawn Stars
type: episode
video_codec: h264
video_codec: H.264
? Duck.Dynasty.S09E09.Van.He-llsing.HDTVx264-JIVE
: episode: 9
episode_title: Van He-llsing
season: 9
format: HDTV
source: HDTV
release_group: JIVE
title: Duck Dynasty
type: episode
video_codec: h264
video_codec: H.264
? ATKExotics.16.01.24.Ava.Alba.Watersports.XXX.1080p.MP4-KTR
: title: ATKExotics
episode_title: Ava Alba Watersports
other: XXX
screen_size: 1080p
container: mp4
release_group: KTR
type: episode
? PutaLocura.15.12.22.Spanish.Luzzy.XXX.720p.MP4-oRo
: title: PutaLocura
episode_title: Spanish Luzzy
other: XXX
screen_size: 720p
container: mp4
release_group: oRo
type: episode
? French Maid Services - Lola At Your Service WEB-DL SPLIT SCENES MP4-RARBG
: title: French Maid Services
alternative_title: Lola At Your Service
source: Web
container: mp4
release_group: RARBG
type: movie
? French Maid Services - Lola At Your Service - Marc Dorcel WEB-DL SPLIT SCENES MP4-RARBG
: title: French Maid Services
alternative_title: [Lola At Your Service, Marc Dorcel]
source: Web
container: mp4
release_group: RARBG
type: movie
? PlayboyPlus.com_16.01.23.Eleni.Corfiate.Playboy.Romania.XXX.iMAGESET-OHRLY
: episode_title: Eleni Corfiate Playboy Romania
other: XXX
type: episode
? TeenPornoPass - Anna - Beautiful Ass Deep Penetrated 720p mp4
: title: TeenPornoPass
alternative_title:
- Anna
- Beautiful Ass Deep Penetrated
screen_size: 720p
container: mp4
type: movie
? SexInJeans.Gina.Gerson.Super.Nasty.Asshole.Pounding.With.Gina.In.Jeans.A.Devil.In.Denim.The.Finest.Ass.Fuck.Frolicking.mp4
: title: SexInJeans Gina Gerson Super Nasty Asshole Pounding With Gina In Jeans A Devil In Denim The Finest Ass Fuck Frolicking
container: mp4
type: movie
? TNA Impact Wrestling HDTV 2017-06-22 720p H264 AVCHD-SC-SDH
: title: TNA Impact Wrestling
source: HDTV
date: 2017-06-22
screen_size: 720p
video_codec: H.264
video_profile:
- Advanced Video Codec High Definition
- Scalable Video Coding
release_group: SDH
type: episode
? Katy Perry - Pepsi & Billboard Summer Beats Concert Series 2012 1080i HDTV 20 Mbps DD2.0 MPEG2-TrollHD.ts
: title: Katy Perry
alternative_title: Pepsi & Billboard Summer Beats Concert Series
year: 2012
screen_size: 1080i
source: HDTV
video_bit_rate: 20Mbps
audio_codec: Dolby Digital
audio_channels: '2.0'
video_codec: MPEG-2
release_group: TrollHD
container: ts
? Justin Timberlake - MTV Video Music Awards 2013 1080i 32 Mbps DTS-HD 5.1.ts
: title: Justin Timberlake
alternative_title: MTV Video Music Awards
year: 2013
screen_size: 1080i
video_bit_rate: 32Mbps
audio_codec: DTS-HD
audio_channels: '5.1'
container: ts
type: movie
? Chuck Berry The Very Best Of Chuck Berry(2010)[320 Kbps]
: title: Chuck Berry The Very Best Of Chuck Berry
year: 2010
audio_bit_rate: 320Kbps
type: movie
? Title Name [480p][1.5Mbps][.mp4]
: title: Title Name
screen_size: 480p
video_bit_rate: 1.5Mbps
container: mp4
type: movie
? This.is.Us
: options: --no-default-config
title: This is Us
type: movie
? This.is.Us
: options: --excludes country
title: This is Us
type: movie
? MotoGP.2016x03.USA.Race.BTSportHD.1080p25
: title: MotoGP
season: 2016
year: 2016
episode: 3
screen_size: 1080p
frame_rate: 25fps
type: episode
? BBC.Earth.South.Pacific.2010.D2.1080p.24p.BD25.DTS-HD
: title: BBC Earth South Pacific
year: 2010
screen_size: 1080p
frame_rate: 24fps
source: Blu-ray
audio_codec: DTS-HD
type: movie

View file

@ -3,6 +3,7 @@
"""
Options
"""
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
@ -11,6 +12,8 @@ import babelfish
import yaml
from .rules.common.quantity import BitRate, FrameRate, Size
class OrderedDictYAMLLoader(yaml.Loader):
"""
@ -61,11 +64,18 @@ class CustomDumper(yaml.SafeDumper):
def default_representer(dumper, data):
"""Default representer"""
return dumper.represent_str(str(data))
CustomDumper.add_representer(babelfish.Language, default_representer)
CustomDumper.add_representer(babelfish.Country, default_representer)
CustomDumper.add_representer(BitRate, default_representer)
CustomDumper.add_representer(FrameRate, default_representer)
CustomDumper.add_representer(Size, default_representer)
def ordered_dict_representer(dumper, data):
"""OrderedDict representer"""
return dumper.represent_dict(data)
return dumper.represent_mapping('tag:yaml.org,2002:map', data.items())
CustomDumper.add_representer(OrderedDict, ordered_dict_representer)

View file

@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '0.7.7.dev0'
__version__ = '1.0.0'

View file

@ -24,7 +24,7 @@ class Chain(Pattern):
Definition of a pattern chain to search for.
"""
def __init__(self, rebulk, **kwargs):
def __init__(self, rebulk, chain_breaker=None, **kwargs):
call(super(Chain, self).__init__, **kwargs)
self._kwargs = kwargs
self._match_kwargs = filter_match_kwargs(kwargs)
@ -32,6 +32,10 @@ class Chain(Pattern):
self._regex_defaults = {}
self._string_defaults = {}
self._functional_defaults = {}
if callable(chain_breaker):
self.chain_breaker = chain_breaker
else:
self.chain_breaker = None
self.rebulk = rebulk
self.parts = []
@ -161,10 +165,12 @@ class Chain(Pattern):
return self.rebulk
def _match(self, pattern, input_string, context=None):
# pylint: disable=too-many-locals,too-many-nested-blocks
chain_matches = []
chain_input_string = input_string
offset = 0
while offset < len(input_string):
chain_found = False
current_chain_matches = []
valid_chain = True
is_chain_start = True
@ -173,21 +179,39 @@ class Chain(Pattern):
chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part,
chain_input_string,
context)
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
if raw_chain_part_matches:
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
offset = raw_chain_part_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
current_chain_matches.extend(chain_part_matches)
grouped_matches_dict = dict()
for match_index, match in itertools.groupby(chain_part_matches,
lambda m: m.match_index):
grouped_matches_dict[match_index] = list(match)
grouped_raw_matches_dict = dict()
for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
lambda m: m.match_index):
grouped_raw_matches_dict[match_index] = list(raw_match)
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
chain_found = True
offset = grouped_raw_matches[-1].raw_end
chain_input_string = input_string[offset:]
if not chain_part.is_hidden:
grouped_matches = grouped_matches_dict.get(match_index, [])
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
current_chain_matches.extend(grouped_matches)
except _InvalidChainException:
valid_chain = False
if current_chain_matches:
offset = current_chain_matches[0].raw_end
break
is_chain_start = False
if not current_chain_matches:
if not chain_found:
break
if valid_chain:
if current_chain_matches and valid_chain:
match = self._build_chain_match(current_chain_matches, input_string)
chain_matches.append(match)
@ -244,6 +268,9 @@ class Chain(Pattern):
chain_match.parent = match
return match
def _chain_breaker_eval(self, matches):
return not self.chain_breaker or not self.chain_breaker(Matches(matches))
@staticmethod
def _fix_matches_offset(chain_part_matches, input_string, offset):
for chain_part_match in chain_part_matches:
@ -273,14 +300,14 @@ class Chain(Pattern):
if not is_chain_start:
separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
if len(separator) > 0:
if separator:
return []
j = 1
for i in range(0, len(chain_part_matches) - 1):
separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
chain_part_matches[i + 1].initiator.raw_start]
if len(separator) > 0:
if separator:
break
j += 1
truncated = chain_part_matches[:j]

Some files were not shown because too many files have changed in this diff Show more