Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2
Updates rebulk to 2.0.1
This commit is contained in:
Labrys of Knossos 2022-11-28 19:44:46 -05:00
commit 2226a74ef8
66 changed files with 2995 additions and 1306 deletions

View file

@ -142,7 +142,7 @@ def main(args=None): # pylint:disable=too-many-branches
if options.get('yaml'):
try:
import yaml # pylint:disable=unused-variable
import yaml # pylint:disable=unused-variable,unused-import
except ImportError: # pragma: no cover
del options['yaml']
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)

View file

@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '3.0.3'
__version__ = '3.1.1'

View file

@ -82,6 +82,19 @@ def properties(options=None):
return default_api.properties(options)
def suggested_expected(titles, options=None):
"""
Return a list of suggested titles to be used as `expected_title` based on the list of titles
:param titles: the filename or release name
:type titles: list|set|dict
:param options:
:type options: str|dict
:return:
:rtype: list of str
"""
return default_api.suggested_expected(titles, options)
class GuessItApi(object):
"""
An api class that can be configured with custom Rebulk configuration.
@ -228,5 +241,23 @@ class GuessItApi(object):
ordered = self.rebulk.customize_properties(ordered)
return ordered
def suggested_expected(self, titles, options=None):
"""
Return a list of suggested titles to be used as `expected_title` based on the list of titles
:param titles: the filename or release name
:type titles: list|set|dict
:param options:
:type options: str|dict
:return:
:rtype: list of str
"""
suggested = []
for title in titles:
guess = self.guessit(title, options)
if len(guess) != 2 or 'title' not in guess:
suggested.append(title)
return suggested
default_api = GuessItApi()

View file

@ -4,7 +4,7 @@
Backports
"""
# pragma: no-cover
# pylint: disabled
# pylint: skip-file
def cmp_to_key(mycmp):
"""functools.cmp_to_key backport"""

View file

@ -1,18 +1,19 @@
{
"expected_title": [
"OSS 117"
"OSS 117",
"This is Us"
],
"allowed_countries": [
"au",
"us",
"gb"
"gb",
"us"
],
"allowed_languages": [
"ca",
"cs",
"de",
"en",
"es",
"ca",
"cs",
"fr",
"he",
"hi",
@ -20,7 +21,9 @@
"it",
"ja",
"ko",
"mul",
"nl",
"no",
"pl",
"pt",
"ro",
@ -28,18 +31,50 @@
"sv",
"te",
"uk",
"mul",
"und"
],
"advanced_config": {
"common_words": [
"ca",
"cat",
"de",
"it"
"he",
"it",
"no",
"por",
"rum",
"se",
"st",
"sub"
],
"groups": {
"starting": "([{",
"ending": ")]}"
},
"audio_codec": {
"audio_channels": {
"1.0": [
"1ch",
"mono"
],
"2.0": [
"2ch",
"stereo",
"re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
],
"5.1": [
"5ch",
"6ch",
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
],
"7.1": [
"7ch",
"8ch",
"re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
]
}
},
"container": {
"subtitles": [
"srt",
@ -59,9 +94,10 @@
"avi",
"divx",
"flv",
"mk3d",
"iso",
"m4v",
"mk2",
"mk3d",
"mka",
"mkv",
"mov",
@ -77,12 +113,11 @@
"ram",
"rm",
"ts",
"vob",
"wav",
"webm",
"wma",
"wmv",
"iso",
"vob"
"wmv"
],
"torrent": [
"torrent"
@ -255,7 +290,6 @@
],
"subtitle_prefixes": [
"st",
"v",
"vost",
"subforced",
"fansub",
@ -297,12 +331,12 @@
},
"release_group": {
"forbidden_names": [
"rip",
"bonus",
"by",
"for",
"par",
"pour",
"bonus"
"rip"
],
"ignored_seps": "[]{}()"
},
@ -311,6 +345,7 @@
"23.976",
"24",
"25",
"29.970",
"30",
"48",
"50",
@ -329,6 +364,7 @@
"progressive": [
"360",
"480",
"540",
"576",
"900",
"1080",
@ -342,8 +378,8 @@
"website": {
"safe_tlds": [
"com",
"org",
"net"
"net",
"org"
],
"safe_subdomains": [
"www"
@ -351,12 +387,200 @@
"safe_prefixes": [
"co",
"com",
"org",
"net"
"net",
"org"
],
"prefixes": [
"from"
]
},
"streaming_service": {
"A&E": [
"AE",
"A&E"
],
"ABC": "AMBC",
"ABC Australia": "AUBC",
"Al Jazeera English": "AJAZ",
"AMC": "AMC",
"Amazon Prime": [
"AMZN",
"Amazon",
"re:Amazon-?Prime"
],
"Adult Swim": [
"AS",
"re:Adult-?Swim"
],
"America's Test Kitchen": "ATK",
"Animal Planet": "ANPL",
"AnimeLab": "ANLB",
"AOL": "AOL",
"ARD": "ARD",
"BBC iPlayer": [
"iP",
"re:BBC-?iPlayer"
],
"BravoTV": "BRAV",
"Canal+": "CNLP",
"Cartoon Network": "CN",
"CBC": "CBC",
"CBS": "CBS",
"CNBC": "CNBC",
"Comedy Central": [
"CC",
"re:Comedy-?Central"
],
"Channel 4": "4OD",
"CHRGD": "CHGD",
"Cinemax": "CMAX",
"Country Music Television": "CMT",
"Comedians in Cars Getting Coffee": "CCGC",
"Crunchy Roll": [
"CR",
"re:Crunchy-?Roll"
],
"Crackle": "CRKL",
"CSpan": "CSPN",
"CTV": "CTV",
"CuriosityStream": "CUR",
"CWSeed": "CWS",
"Daisuki": "DSKI",
"DC Universe": "DCU",
"Deadhouse Films": "DHF",
"DramaFever": [
"DF",
"DramaFever"
],
"Digiturk Diledigin Yerde": "DDY",
"Discovery": [
"DISC",
"Discovery"
],
"Disney": [
"DSNY",
"Disney"
],
"DIY Network": "DIY",
"Doc Club": "DOCC",
"DPlay": "DPLY",
"E!": "ETV",
"ePix": "EPIX",
"El Trece": "ETTV",
"ESPN": "ESPN",
"Esquire": "ESQ",
"Family": "FAM",
"Family Jr": "FJR",
"Food Network": "FOOD",
"Fox": "FOX",
"Freeform": "FREE",
"FYI Network": "FYI",
"Global": "GLBL",
"GloboSat Play": "GLOB",
"Hallmark": "HLMK",
"HBO Go": [
"HBO",
"re:HBO-?Go"
],
"HGTV": "HGTV",
"History": [
"HIST",
"History"
],
"Hulu": "HULU",
"Investigation Discovery": "ID",
"IFC": "IFC",
"iTunes": "iTunes",
"ITV": "ITV",
"Knowledge Network": "KNOW",
"Lifetime": "LIFE",
"Motor Trend OnDemand": "MTOD",
"MBC": [
"MBC",
"MBCVOD"
],
"MSNBC": "MNBC",
"MTV": "MTV",
"National Geographic": [
"NATG",
"re:National-?Geographic"
],
"NBA TV": [
"NBA",
"re:NBA-?TV"
],
"NBC": "NBC",
"Netflix": [
"NF",
"Netflix"
],
"NFL": "NFL",
"NFL Now": "NFLN",
"NHL GameCenter": "GC",
"Nickelodeon": [
"NICK",
"Nickelodeon"
],
"Norsk Rikskringkasting": "NRK",
"OnDemandKorea": [
"ODK",
"OnDemandKorea"
],
"PBS": "PBS",
"PBS Kids": "PBSK",
"Playstation Network": "PSN",
"Pluzz": "PLUZ",
"RTE One": "RTE",
"SBS (AU)": "SBS",
"SeeSo": [
"SESO",
"SeeSo"
],
"Shomi": "SHMI",
"Spike": "SPIK",
"Spike TV": [
"SPKE",
"re:Spike-?TV"
],
"Sportsnet": "SNET",
"Sprout": "SPRT",
"Stan": "STAN",
"Starz": "STZ",
"Sveriges Television": "SVT",
"SwearNet": "SWER",
"Syfy": "SYFY",
"TBS": "TBS",
"TFou": "TFOU",
"The CW": [
"CW",
"re:The-?CW"
],
"TLC": "TLC",
"TubiTV": "TUBI",
"TV3 Ireland": "TV3",
"TV4 Sweeden": "TV4",
"TVING": "TVING",
"TV Land": [
"TVL",
"re:TV-?Land"
],
"UFC": "UFC",
"UKTV": "UKTV",
"Univision": "UNIV",
"USA Network": "USAN",
"Velocity": "VLCT",
"VH1": "VH1",
"Viceland": "VICE",
"Viki": "VIKI",
"Vimeo": "VMEO",
"VRV": "VRV",
"W Network": "WNET",
"WatchMe": "WME",
"WWE Network": "WWEN",
"Xbox Video": "XBOX",
"Yahoo": "YHOO",
"YouTube Red": "RED",
"ZDF": "ZDF"
}
}
}
}

View file

@ -128,7 +128,7 @@ class ConfigurationException(Exception):
"""
Exception related to configuration file.
"""
pass
pass # pylint:disable=unnecessary-pass
def load_config(options):
@ -153,7 +153,7 @@ def load_config(options):
cwd = os.getcwd()
yaml_supported = False
try:
import yaml # pylint: disable=unused-variable
import yaml # pylint:disable=unused-variable,unused-import
yaml_supported = True
except ImportError:
pass
@ -252,7 +252,7 @@ def load_config_file(filepath):
try:
import yaml
with open(filepath) as config_file_data:
return yaml.load(config_file_data)
return yaml.load(config_file_data, yaml.SafeLoader)
except ImportError: # pragma: no cover
raise ConfigurationException('Configuration file extension is not supported. '
'PyYAML should be installed to support "%s" file' % (

View file

@ -25,7 +25,7 @@ def _potential_before(i, input_string):
:return:
:rtype: bool
"""
return i - 2 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
def _potential_after(i, input_string):

View file

@ -28,7 +28,7 @@ def int_coercable(string):
return False
def compose(*validators):
def and_(*validators):
"""
Compose validators functions
:param validators:
@ -49,3 +49,26 @@ def compose(*validators):
return False
return True
return composed
def or_(*validators):
"""
Compose validators functions
:param validators:
:type validators:
:return:
:rtype:
"""
def composed(string):
"""
Composed validators function
:param string:
:type string:
:return:
:rtype:
"""
for validator in validators:
if validator(string):
return True
return False
return composed

View file

@ -0,0 +1,20 @@
"""
Match processors
"""
from guessit.rules.common import seps
def strip(match, chars=seps):
"""
Strip given characters from match.
:param chars:
:param match:
:return:
"""
while match.input_string[match.start] in chars:
match.start += 1
while match.input_string[match.end - 1] in chars:
match.end -= 1
if not match:
return False

View file

@ -34,7 +34,9 @@ class EnlargeGroupMatches(CustomRule):
for match in matches.ending(group.end - 1):
ending.append(match)
return starting, ending
if starting or ending:
return starting, ending
return False
def then(self, matches, when_response, context):
starting, ending = when_response

View file

@ -3,9 +3,8 @@
"""
audio_codec, audio_profile and audio_channels property
"""
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from ..common import dash
from ..common.pattern import is_disabled
@ -23,7 +22,9 @@ def audio_codec(config): # pylint:disable=unused-argument
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk = Rebulk()\
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
.string_defaults(ignore_case=True)
def audio_codec_priority(match1, match2):
"""
@ -61,7 +62,9 @@ def audio_codec(config): # pylint:disable=unused-argument
rebulk.string('PCM', value='PCM')
rebulk.string('LPCM', value='LPCM')
rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.defaults(clear=True,
name='audio_profile',
disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
@ -70,17 +73,19 @@ def audio_codec(config): # pylint:disable=unused-argument
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True)
rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True)
rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True)
rebulk.defaults(clear=True,
name="audio_channels",
disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
rebulk.string('7ch', '8ch', value='7.1')
rebulk.string('5ch', '6ch', value='5.1')
rebulk.string('2ch', 'stereo', value='2.0')
rebulk.string('1ch', 'mono', value='1.0')
for value, items in config.get('audio_channels').items():
for item in items:
if item.startswith('re:'):
rebulk.regex(item[3:], value=value, children=True)
else:
rebulk.string(item, value=value)
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
AudioChannelsValidatorRule)

View file

@ -69,4 +69,6 @@ class BitRateTypeRule(Rule):
else:
to_rename.append(match)
return to_rename, to_remove
if to_rename or to_remove:
return to_rename, to_remove
return False

View file

@ -26,7 +26,8 @@ def bonus(config): # pylint:disable=unused-argument
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
validator={'__parent__': lambda match: seps_surround},
validator={'__parent__': seps_surround},
validate_all=True,
conflict_solver=lambda match, conflicting: match
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
else '__default__')

View file

@ -44,7 +44,8 @@ def container(config):
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
rebulk.defaults(name='container',
rebulk.defaults(clear=True,
name='container',
validator=seps_surround,
formatter=lambda s: s.lower(),
conflict_solver=lambda match, other: match

View file

@ -10,6 +10,7 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PRO
from ..common import seps, title_seps
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import or_
from ..properties.title import TitleFromPosition, TitleBaseRule
from ..properties.type import TypeProcessor
@ -133,8 +134,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
def hole_filter(self, hole, matches):
episode = matches.previous(hole,
lambda previous: any(name in previous.names
for name in self.previous_names),
lambda previous: previous.named(*self.previous_names),
0)
crc32 = matches.named('crc32')
@ -179,8 +179,7 @@ class AlternativeTitleReplace(Rule):
predicate=lambda match: 'title' in match.tags, index=0)
if main_title:
episode = matches.previous(main_title,
lambda previous: any(name in previous.names
for name in self.previous_names),
lambda previous: previous.named(*self.previous_names),
0)
crc32 = matches.named('crc32')
@ -249,7 +248,7 @@ class Filepart3EpisodeTitle(Rule):
if season:
hole = matches.holes(subdirectory.start, subdirectory.end,
ignore=lambda match: 'weak-episode' in match.tags,
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
index=0)
if hole:
@ -292,7 +291,8 @@ class Filepart2EpisodeTitle(Rule):
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
if season:
hole = matches.holes(directory.start, directory.end, ignore=lambda match: 'weak-episode' in match.tags,
hole = matches.holes(directory.start, directory.end,
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps,
predicate=lambda match: match.value, index=0)
if hole:

View file

@ -11,12 +11,13 @@ from rebulk.match import Match
from rebulk.remodule import re
from rebulk.utils import is_iterable
from guessit.rules import match_processors
from guessit.rules.common.numeral import parse_numeral, numeral
from .title import TitleFromPosition
from ..common import dash, alt_dash, seps, seps_no_fs
from ..common.formatters import strip
from ..common.numeral import numeral, parse_numeral
from ..common.pattern import is_disabled
from ..common.validators import compose, seps_surround, seps_before, int_coercable
from ..common.validators import seps_surround, int_coercable, and_
from ...reutils import build_or_pattern
@ -29,17 +30,12 @@ def episodes(config):
:return: Created Rebulk object
:rtype: Rebulk
"""
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
def is_season_episode_disabled(context):
"""Whether season and episode rules should be enabled."""
return is_disabled(context, 'episode') or is_disabled(context, 'season')
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
def episodes_season_chain_breaker(matches):
"""
Break chains if there's more than 100 offset between two neighbor values.
@ -57,8 +53,6 @@ def episodes(config):
return True
return False
rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
def season_episode_conflict_solver(match, other):
"""
Conflict solver for episode/season patterns
@ -76,7 +70,6 @@ def episodes(config):
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
and not match.initiator.children.named(match.name + 'Marker')) or (
other.name == 'screen_size' and not int_coercable(other.raw)):
return match
if other.name in ('season', 'episode') and match.initiator != other.initiator:
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
@ -87,21 +80,6 @@ def episodes(config):
return current
return '__default__'
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
max_range_gap = config['max_range_gap']
def ordering_validator(match):
"""
Validator for season list. They should be in natural order to be validated.
@ -135,65 +113,18 @@ def episodes(config):
lambda m: m.name == property_name + 'Separator')
separator = match.children.previous(current_match,
lambda m: m.name == property_name + 'Separator', 0)
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
break
if separator:
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
break
previous_match = current_match
return valid
return is_consecutive('episode') and is_consecutive('season')
# S01E02, 01x02, S01S02S03
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
children=True,
private_parent=True,
validate_all=True,
validator={'__parent__': ordering_validator},
conflict_solver=season_episode_conflict_solver,
disabled=is_season_episode_disabled) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}).repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail, value=episode_detail, name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
def validate_roman(match):
"""
Validate a roman match if surrounded by separators
@ -206,117 +137,203 @@ def episodes(config):
return True
return seps_surround(match)
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
max_range_gap = config['max_range_gap']
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.string_defaults(ignore_case=True) \
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver,
abbreviations=[alt_dash])
# S01E02, 01x02, S01S02S03
rebulk.chain(
tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
.repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)').repeater('+') \
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)') \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail,
private_parent=False,
children=False,
value=episode_detail,
name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver)
rebulk.chain(abbreviations=[alt_dash],
rebulk.chain(validate_all=True,
conflict_solver=season_episode_conflict_solver,
formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'__parent__': compose(seps_surround, ordering_validator),
validator={'__parent__': and_(seps_surround, ordering_validator),
'season': validate_roman,
'count': validate_roman},
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
.defaults(validator=None) \
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'season': validate_roman, 'count': validate_roman},
conflict_solver=season_episode_conflict_solver) \
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
name='seasonSeparator', escape=True) +
r'@?(?P<season>\d+)').repeater('*')
rebulk.defaults(abbreviations=[dash])
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash],
validator={'episode': validate_roman},
formatter={'episode': parse_numeral, 'version': int, 'count': int},
formatter={'episode': parse_numeral},
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
tags=['SxxExx'],
abbreviations=[dash],
validator=None,
formatter={'season': int, 'other': lambda match: 'Complete'},
formatter={'other': lambda match: 'Complete'},
disabled=lambda context: is_disabled(context, 'season'))
# 12, 13
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
# 012, 013
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'0(?P<episode>\d{1,2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# 112, 113
rebulk.chain(tags=['weak-episode'],
formatter={'episode': int, 'version': int},
name='weak_episode',
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
.regex(r'(?P<episode>\d{3,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
# 1, 2, 3
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d)') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# e112, e113, 1e18, 3e19
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
rebulk.chain(formatter={'season': int, 'episode': int, 'version': int},
disabled=lambda context: is_disabled(context, 'episode')) \
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# ep 112, ep113, ep112, ep113
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int},
disabled=lambda context: is_disabled(context, 'episode')) \
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'ep-?(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# cap 112, cap 112_114
rebulk.chain(abbreviations=[dash],
tags=['see-pattern'],
formatter={'season': int, 'episode': int},
rebulk.chain(tags=['see-pattern'],
disabled=is_season_episode_disabled) \
.defaults(validator=None) \
.defaults(validator=None, tags=['see-pattern']) \
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
# 102, 0102
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
formatter={'season': int, 'episode': int, 'version': int},
name='weak_duplicate',
conflict_solver=season_episode_conflict_solver,
disabled=lambda context: (context.get('episode_prefer_number', False) or
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
.defaults(validator=None) \
.defaults(tags=['weak-episode', 'weak-duplicate'],
name='weak_duplicate',
validator=None,
conflict_solver=season_episode_conflict_solver) \
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int,
rebulk.regex(r'v(?P<version>\d+)',
formatter=int,
disabled=lambda context: is_disabled(context, 'version'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
@ -325,18 +342,23 @@ def episodes(config):
# detached of X count (season/episode)
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
abbreviations=[dash], children=True, private_parent=True, formatter=int,
formatter=int,
pre_match_processor=match_processors.strip,
disabled=lambda context: is_disabled(context, 'episode'))
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode",
rebulk.regex(r'Minisodes?',
children=False,
private_parent=False,
name='episode_format',
value="Minisode",
disabled=lambda context: is_disabled(context, 'episode_format'))
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
SeePatternRange(range_separators + ['_']),
EpisodeNumberSeparatorRange(range_separators),
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
return rebulk
@ -416,7 +438,9 @@ class WeakConflictSolver(Rule):
if to_append:
to_remove.extend(weak_dup_matches)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class CountValidator(Rule):
@ -442,7 +466,9 @@ class CountValidator(Rule):
season_count.append(count)
else:
to_remove.append(count)
return to_remove, episode_count, season_count
if to_remove or episode_count or season_count:
return to_remove, episode_count, season_count
return False
class SeePatternRange(Rule):
@ -477,7 +503,9 @@ class SeePatternRange(Rule):
to_remove.append(separator)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class AbstractSeparatorRange(Rule):
@ -533,7 +561,9 @@ class AbstractSeparatorRange(Rule):
previous_match = next_match
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class RenameToAbsoluteEpisode(Rule):
@ -629,20 +659,41 @@ class RemoveWeak(Rule):
Remove weak-episode matches which appears after video, source, and audio matches.
"""
priority = 16
consequence = RemoveMatch
consequence = RemoveMatch, AppendMatch
def __init__(self, episode_words):
super(RemoveWeak, self).__init__()
self.episode_words = episode_words
def when(self, matches, context):
to_remove = []
to_append = []
for filepart in matches.markers.named('path'):
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
if weaks:
previous = matches.previous(weaks[0], predicate=lambda m: m.name in (
weak = weaks[0]
previous = matches.previous(weak, predicate=lambda m: m.name in (
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
'audio_channels', 'audio_profile'), index=0)
if previous and not matches.holes(
previous.end, weaks[0].start, predicate=lambda m: m.raw.strip(seps)):
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
if previous.raw.lower() in self.episode_words:
try:
episode = copy.copy(weak)
episode.name = 'episode'
episode.value = int(weak.value)
episode.start = previous.start
episode.private = False
episode.tags = []
to_append.append(episode)
except ValueError:
pass
to_remove.extend(weaks)
return to_remove
if to_remove or to_append:
return to_remove, to_append
return False
class RemoveWeakIfSxxExx(Rule):
@ -856,4 +907,6 @@ class RenameToDiscMatch(Rule):
markers.append(marker)
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
return discs, markers, to_remove
if discs or markers or to_remove:
return discs, markers, to_remove
return False

View file

@ -72,6 +72,8 @@ def language(config, common_words):
UNDETERMINED = babelfish.Language('und')
MULTIPLE = babelfish.Language('mul')
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
@ -388,7 +390,9 @@ class SubtitlePrefixLanguageRule(Rule):
to_remove.extend(matches.conflicting(lang))
if prefix in to_remove:
to_remove.remove(prefix)
return to_rename, to_remove
if to_rename or to_remove:
return to_rename, to_remove
return False
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
@ -425,7 +429,9 @@ class SubtitleSuffixLanguageRule(Rule):
to_append.append(lang)
if suffix in to_remove:
to_remove.remove(suffix)
return to_append, to_remove
if to_append or to_remove:
return to_append, to_remove
return False
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
@ -478,6 +484,7 @@ class RemoveInvalidLanguages(Rule):
"""Remove language matches that matches the blacklisted common words."""
consequence = RemoveMatch
priority = 32
def __init__(self, common_words):
"""Constructor."""

View file

@ -11,7 +11,7 @@ from rebulk.remodule import re
from ..common import dash
from ..common import seps
from ..common.pattern import is_disabled
from ..common.validators import seps_after, seps_before, seps_surround, compose
from ..common.validators import seps_after, seps_before, seps_surround, and_
from ...reutils import build_or_pattern
from ...rules.common.formatters import raw_cleanup
@ -35,11 +35,16 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
rebulk.string('Proper', 'Repack', 'Rerip', value='Proper',
rebulk.string('Repack', 'Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Proper', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.regex('Real', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
@ -72,16 +77,18 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
value={'other': 'Complete'},
tags=['release-group-prefix'],
validator={'__parent__': compose(seps_surround, validate_complete)})
validator={'__parent__': and_(seps_surround, validate_complete)})
rebulk.string('R5', value='Region 5')
rebulk.string('RC', value='Region C')
rebulk.regex('Pre-?Air', value='Preair')
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'):
for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
rebulk.string(value, value=value)
rebulk.string('3D', value='3D', tags='has-neighbor')
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
rebulk.string('HR', value='High Resolution')
@ -90,6 +97,7 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.string('mHD', 'HDLight', value='Micro HD')
rebulk.string('LDTV', value='Low Definition')
rebulk.string('HFR', value='High Frame Rate')
rebulk.string('VFR', value='Variable Frame Rate')
rebulk.string('HD', value='HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
@ -128,13 +136,15 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
rebulk.string('Extras', value='Extras', tags='has-neighbor')
rebulk.regex('Digital-?Extras?', value='Extras')
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
ValidateAtEnd, ProperCountRule)
ValidateAtEnd, ValidateReal, ProperCountRule)
return rebulk
@ -354,3 +364,20 @@ class ValidateAtEnd(Rule):
to_remove.append(match)
return to_remove
class ValidateReal(Rule):
"""
Validate Real
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
if not matches.range(filepart.start, match.start):
ret.append(match)
return ret

View file

@ -8,7 +8,7 @@ from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_surround, int_coercable, compose
from ..common.validators import seps_surround, int_coercable, and_
from ..common.numeral import numeral, parse_numeral
from ...reutils import build_or_pattern
@ -41,6 +41,6 @@ def part(config): # pylint:disable=unused-argument
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
return rebulk

View file

@ -9,8 +9,8 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
from rebulk.match import Match
from ..common import seps
from ..common.expected import build_expected_function
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import int_coercable, seps_surround
@ -50,7 +50,7 @@ def release_group(config):
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
string = string[:len(forbidden)]
string = string.strip(groupname_seps)
return string
return string.strip()
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
@ -72,7 +72,9 @@ _scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'a
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
_scene_previous_tags = ('release-group-prefix', )
_scene_previous_tags = ('release-group-prefix',)
_scene_no_previous_tags = ('no-release-group-prefix',)
class DashSeparatedReleaseGroup(Rule):
@ -193,7 +195,8 @@ class DashSeparatedReleaseGroup(Rule):
if releasegroup.value:
to_append.append(releasegroup)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
class SceneReleaseGroup(Rule):
@ -212,6 +215,17 @@ class SceneReleaseGroup(Rule):
super(SceneReleaseGroup, self).__init__()
self.value_formatter = value_formatter
@staticmethod
def is_previous_match(match):
"""
Check if match can precede release_group
:param match:
:return:
"""
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
match.tagged(*_scene_previous_tags)
def when(self, matches, context): # pylint:disable=too-many-locals
# If a release_group is found before, ignore this kind of release_group rule.
@ -253,13 +267,12 @@ class SceneReleaseGroup(Rule):
if match.start < filepart.start:
return False
return not match.private or match.name in _scene_previous_names
return not match.private or self.is_previous_match(match)
previous_match = matches.previous(last_hole,
previous_match_filter,
index=0)
if previous_match and (previous_match.name in _scene_previous_names or
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
if previous_match and (self.is_previous_match(previous_match)) and \
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
and not int_coercable(last_hole.value.strip(seps)):
@ -300,11 +313,11 @@ class AnimeReleaseGroup(Rule):
# If a release_group is found before, ignore this kind of release_group rule.
if matches.named('release_group'):
return to_remove, to_append
return False
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
# This doesn't seems to be an anime, and we already found another release_group.
return to_remove, to_append
return False
for filepart in marker_sorted(matches.markers.named('path'), matches):
@ -328,4 +341,7 @@ class AnimeReleaseGroup(Rule):
to_append.append(group)
to_remove.extend(matches.range(empty_group.start, empty_group.end,
lambda m: 'weak-language' in m.tags))
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False

View file

@ -24,8 +24,8 @@ def screen_size(config):
:return: Created Rebulk object
:rtype: Rebulk
"""
interlaced = frozenset({res for res in config['interlaced']})
progressive = frozenset({res for res in config['progressive']})
interlaced = frozenset(config['interlaced'])
progressive = frozenset(config['progressive'])
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
min_ar = config['min_ar']
max_ar = config['max_ar']

View file

@ -12,7 +12,7 @@ from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
from .audio_codec import HqConflictRule
from ..common import dash, seps
from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after
from ..common.validators import seps_before, seps_after, or_
def source(config): # pylint:disable=unused-argument
@ -26,7 +26,10 @@ def source(config): # pylint:disable=unused-argument
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix'])
rebulk = rebulk.defaults(name='source',
tags=['video-codec-prefix', 'streaming_service.suffix'],
validate_all=True,
validator={'__parent__': or_(seps_before, seps_after)})
rip_prefix = '(?P<other>Rip)-?'
rip_suffix = '-?(?P<other>Rip)'
@ -42,7 +45,7 @@ def source(config): # pylint:disable=unused-argument
def demote_other(match, other): # pylint: disable=unused-argument
"""Default conflict solver with 'other' property."""
return other if other.name == 'other' else '__default__'
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
value={'source': 'VHS', 'other': 'Rip'})
@ -92,8 +95,9 @@ def source(config): # pylint:disable=unused-argument
# WEBCap is a synonym to WEBRip, mostly used by non english
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'WEB', 'DL-?WEB', 'DL(?=-?Mux)'),
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
value={'source': 'Web'})
rebulk.regex('(WEB)', value='Web', tags='weak.source')
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
value={'source': 'HD-DVD', 'other': 'Rip'})
@ -118,7 +122,7 @@ def source(config): # pylint:disable=unused-argument
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
value={'source': 'Satellite', 'other': 'Rip'})
rebulk.rules(ValidateSource, UltraHdBlurayRule)
rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
return rebulk
@ -170,32 +174,62 @@ class UltraHdBlurayRule(Rule):
to_remove.append(match)
to_append.append(new_source)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class ValidateSource(Rule):
class ValidateSourcePrefixSuffix(Rule):
"""
Validate source with screener property, with video_codec property or separated
Validate source with source prefix, source suffix.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for match in matches.named('source'):
match = match.initiator
if not seps_before(match) and \
not matches.range(match.start - 1, match.start - 2,
lambda m: 'source-prefix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
if not seps_after(match) and \
not matches.range(match.end, match.end + 1,
lambda m: 'source-suffix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
match = match.initiator
if not seps_before(match) and \
not matches.range(match.start - 1, match.start - 2,
lambda m: 'source-prefix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
if not seps_after(match) and \
not matches.range(match.end, match.end + 1,
lambda m: 'source-suffix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret
class ValidateWeakSource(Rule):
"""
Validate weak source
"""
dependency = [ValidateSourcePrefixSuffix]
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
# if there are more than 1 source in this filepart, just before the year and with holes for the title
# most likely the source is part of the title
if 'weak.source' in match.tags \
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
and matches.holes(filepart.start, match.start,
predicate=lambda m: m.value.strip(seps), index=-1):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret

View file

@ -25,133 +25,13 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
rebulk.string('AE', 'A&E', value='A&E')
rebulk.string('AMBC', value='ABC')
rebulk.string('AUBC', value='ABC Australia')
rebulk.string('AJAZ', value='Al Jazeera English')
rebulk.string('AMC', value='AMC')
rebulk.string('AMZN', 'Amazon', value='Amazon Prime')
rebulk.regex('Amazon-?Prime', value='Amazon Prime')
rebulk.string('AS', value='Adult Swim')
rebulk.regex('Adult-?Swim', value='Adult Swim')
rebulk.string('ATK', value="America's Test Kitchen")
rebulk.string('ANPL', value='Animal Planet')
rebulk.string('ANLB', value='AnimeLab')
rebulk.string('AOL', value='AOL')
rebulk.string('ARD', value='ARD')
rebulk.string('iP', value='BBC iPlayer')
rebulk.regex('BBC-?iPlayer', value='BBC iPlayer')
rebulk.string('BRAV', value='BravoTV')
rebulk.string('CNLP', value='Canal+')
rebulk.string('CN', value='Cartoon Network')
rebulk.string('CBC', value='CBC')
rebulk.string('CBS', value='CBS')
rebulk.string('CNBC', value='CNBC')
rebulk.string('CC', value='Comedy Central')
rebulk.string('4OD', value='Channel 4')
rebulk.string('CHGD', value='CHRGD')
rebulk.string('CMAX', value='Cinemax')
rebulk.string('CMT', value='Country Music Television')
rebulk.regex('Comedy-?Central', value='Comedy Central')
rebulk.string('CCGC', value='Comedians in Cars Getting Coffee')
rebulk.string('CR', value='Crunchy Roll')
rebulk.string('CRKL', value='Crackle')
rebulk.regex('Crunchy-?Roll', value='Crunchy Roll')
rebulk.string('CSPN', value='CSpan')
rebulk.string('CTV', value='CTV')
rebulk.string('CUR', value='CuriosityStream')
rebulk.string('CWS', value='CWSeed')
rebulk.string('DSKI', value='Daisuki')
rebulk.string('DHF', value='Deadhouse Films')
rebulk.string('DDY', value='Digiturk Diledigin Yerde')
rebulk.string('DISC', 'Discovery', value='Discovery')
rebulk.string('DSNY', 'Disney', value='Disney')
rebulk.string('DIY', value='DIY Network')
rebulk.string('DOCC', value='Doc Club')
rebulk.string('DPLY', value='DPlay')
rebulk.string('ETV', value='E!')
rebulk.string('EPIX', value='ePix')
rebulk.string('ETTV', value='El Trece')
rebulk.string('ESPN', value='ESPN')
rebulk.string('ESQ', value='Esquire')
rebulk.string('FAM', value='Family')
rebulk.string('FJR', value='Family Jr')
rebulk.string('FOOD', value='Food Network')
rebulk.string('FOX', value='Fox')
rebulk.string('FREE', value='Freeform')
rebulk.string('FYI', value='FYI Network')
rebulk.string('GLBL', value='Global')
rebulk.string('GLOB', value='GloboSat Play')
rebulk.string('HLMK', value='Hallmark')
rebulk.string('HBO', value='HBO Go')
rebulk.regex('HBO-?Go', value='HBO Go')
rebulk.string('HGTV', value='HGTV')
rebulk.string('HIST', 'History', value='History')
rebulk.string('HULU', value='Hulu')
rebulk.string('ID', value='Investigation Discovery')
rebulk.string('IFC', value='IFC')
rebulk.string('iTunes', 'iT', value='iTunes')
rebulk.string('ITV', value='ITV')
rebulk.string('KNOW', value='Knowledge Network')
rebulk.string('LIFE', value='Lifetime')
rebulk.string('MTOD', value='Motor Trend OnDemand')
rebulk.string('MNBC', value='MSNBC')
rebulk.string('MTV', value='MTV')
rebulk.string('NATG', value='National Geographic')
rebulk.regex('National-?Geographic', value='National Geographic')
rebulk.string('NBA', value='NBA TV')
rebulk.regex('NBA-?TV', value='NBA TV')
rebulk.string('NBC', value='NBC')
rebulk.string('NF', 'Netflix', value='Netflix')
rebulk.string('NFL', value='NFL')
rebulk.string('NFLN', value='NFL Now')
rebulk.string('GC', value='NHL GameCenter')
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
rebulk.string('NRK', value='Norsk Rikskringkasting')
rebulk.string('PBS', value='PBS')
rebulk.string('PBSK', value='PBS Kids')
rebulk.string('PSN', value='Playstation Network')
rebulk.string('PLUZ', value='Pluzz')
rebulk.string('RTE', value='RTE One')
rebulk.string('SBS', value='SBS (AU)')
rebulk.string('SESO', 'SeeSo', value='SeeSo')
rebulk.string('SHMI', value='Shomi')
rebulk.string('SPIK', value='Spike')
rebulk.string('SPKE', value='Spike TV')
rebulk.regex('Spike-?TV', value='Spike TV')
rebulk.string('SNET', value='Sportsnet')
rebulk.string('SPRT', value='Sprout')
rebulk.string('STAN', value='Stan')
rebulk.string('STZ', value='Starz')
rebulk.string('SVT', value='Sveriges Television')
rebulk.string('SWER', value='SwearNet')
rebulk.string('SYFY', value='Syfy')
rebulk.string('TBS', value='TBS')
rebulk.string('TFOU', value='TFou')
rebulk.string('CW', value='The CW')
rebulk.regex('The-?CW', value='The CW')
rebulk.string('TLC', value='TLC')
rebulk.string('TUBI', value='TubiTV')
rebulk.string('TV3', value='TV3 Ireland')
rebulk.string('TV4', value='TV4 Sweeden')
rebulk.string('TVL', value='TV Land')
rebulk.regex('TV-?Land', value='TV Land')
rebulk.string('UFC', value='UFC')
rebulk.string('UKTV', value='UKTV')
rebulk.string('UNIV', value='Univision')
rebulk.string('USAN', value='USA Network')
rebulk.string('VLCT', value='Velocity')
rebulk.string('VH1', value='VH1')
rebulk.string('VICE', value='Viceland')
rebulk.string('VMEO', value='Vimeo')
rebulk.string('VRV', value='VRV')
rebulk.string('WNET', value='W Network')
rebulk.string('WME', value='WatchMe')
rebulk.string('WWEN', value='WWE Network')
rebulk.string('XBOX', value='Xbox Video')
rebulk.string('YHOO', value='Yahoo')
rebulk.string('RED', value='YouTube Red')
rebulk.string('ZDF', value='ZDF')
for value, items in config.items():
patterns = items if isinstance(items, list) else [items]
for pattern in patterns:
if pattern.startswith('re:'):
rebulk.regex(pattern, value=value)
else:
rebulk.string(pattern, value=value)
rebulk.rules(ValidateStreamingService)
@ -161,7 +41,7 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
class ValidateStreamingService(Rule):
"""Validate streaming service matches."""
priority = 32
priority = 128
consequence = RemoveMatch
def when(self, matches, context):

View file

@ -8,7 +8,12 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
from rebulk.formatters import formatters
from .film import FilmTitleRule
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
from .language import (
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
SubtitleExtensionRule,
NON_SPECIFIC_LANGUAGES
)
from ..common import seps, title_seps
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
@ -88,12 +93,19 @@ class TitleBaseRule(Rule):
:rtype:
"""
cropped_holes = []
group_markers = matches.markers.named('group')
for group_marker in group_markers:
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
if path_marker and path_marker.span == group_marker.span:
group_markers.remove(group_marker)
for hole in holes:
group_markers = matches.markers.named('group')
cropped_holes.extend(hole.crop(group_markers))
return cropped_holes
def is_ignored(self, match):
@staticmethod
def is_ignored(match):
"""
Ignore matches when scanning for title (hole).
@ -130,7 +142,8 @@ class TitleBaseRule(Rule):
for outside in outside_matches:
other_languages.extend(matches.range(outside.start, outside.end,
lambda c_match: c_match.name == match.name and
c_match not in to_keep))
c_match not in to_keep and
c_match.value not in NON_SPECIFIC_LANGUAGES))
if not other_languages and (not starting or len(match.raw) <= 3):
return True
@ -239,7 +252,7 @@ class TitleBaseRule(Rule):
to_remove = []
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
return ret, to_remove
return False
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
@ -272,7 +285,9 @@ class TitleBaseRule(Rule):
ret.extend(titles)
to_remove.extend(to_remove_c)
return ret, to_remove
if ret or to_remove:
return ret, to_remove
return False
class TitleFromPosition(TitleBaseRule):
@ -329,4 +344,6 @@ class PreferTitleWithYear(Rule):
for title_match in titles:
if title_match.value not in title_values:
to_remove.append(title_match)
return to_remove, to_tag
if to_remove or to_tag:
return to_remove, to_tag
return False

View file

@ -3,9 +3,8 @@
"""
video_codec and video_profile property
"""
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from ..common import dash
from ..common.pattern import is_disabled
@ -43,7 +42,8 @@ def video_codec(config): # pylint:disable=unused-argument
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
rebulk.defaults(name="video_profile",
rebulk.defaults(clear=True,
name="video_profile",
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'video_profile'))
@ -66,7 +66,8 @@ def video_codec(config): # pylint:disable=unused-argument
rebulk.string('DXVA', value='DXVA', name='video_api',
disabled=lambda context: is_disabled(context, 'video_api'))
rebulk.defaults(name='color_depth',
rebulk.defaults(clear=True,
name='color_depth',
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'color_depth'))
rebulk.regex('12.?bits?', value='12-bit')

View file

@ -67,7 +67,7 @@ def website(config):
"""
Validator for next website matches
"""
return any(name in ['season', 'episode', 'year'] for name in match.names)
return match.named('season', 'episode', 'year')
def when(self, matches, context):
to_remove = []
@ -80,7 +80,9 @@ def website(config):
if not safe:
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
if suffix:
to_remove.append(website_match)
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
if not group:
to_remove.append(website_match)
return to_remove
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)

View file

@ -35,9 +35,9 @@
-cd: 1
-cd_count: 3
? This.Is.Us
? This.is.Us
: options: --exclude country
title: This Is Us
title: This is Us
-country: US
? 2015.01.31
@ -286,9 +286,9 @@
: options: --exclude website
-website: wawa.co.uk
? movie.mkv
? movie.mp4
: options: --exclude mimetype
-mimetype: video/x-matroska
-mimetype: video/mp4
? another movie.mkv
: options: --exclude container

View file

@ -201,9 +201,9 @@
? Series/My Name Is Earl/My.Name.Is.Earl.S01Extras.-.Bad.Karma.DVDRip.XviD.avi
: title: My Name Is Earl
season: 1
episode_title: Extras - Bad Karma
episode_title: Bad Karma
source: DVD
other: Rip
other: [Extras, Rip]
video_codec: Xvid
? series/Freaks And Geeks/Season 1/Episode 4 - Kim Kelly Is My Friend-eng(1).srt
@ -1917,9 +1917,11 @@
? Duck.Dynasty.S02E07.Streik.German.DOKU.DL.WS.DVDRiP.x264-CDP
: episode: 7
episode_title: Streik German
episode_title: Streik
source: DVD
language: mul
language:
- German
- Multi
other: [Documentary, Widescreen, Rip]
release_group: CDP
season: 2
@ -1930,9 +1932,11 @@
? Family.Guy.S13E14.JOLO.German.AC3D.DL.720p.WebHD.x264-CDD
: audio_codec: Dolby Digital
episode: 14
episode_title: JOLO German
episode_title: JOLO
source: Web
language: mul
language:
- German
- Multi
release_group: CDD
screen_size: 720p
season: 13
@ -3025,7 +3029,7 @@
title: Show Name
episode: [493, 494, 495, 496, 497, 498, 500, 501, 502, 503, 504, 505, 506, 507]
screen_size: 720p
subtitle_language: fr
other: Variable Frame Rate
video_codec: H.264
audio_codec: AAC
type: episode
@ -4524,4 +4528,166 @@
video_codec: H.264
audio_codec: MP2
release_group: KIDKAT
type: episode
? Por Trece Razones - Temporada 2 [HDTV 720p][Cap.201][AC3 5.1 Castellano]/Por Trece Razones 2x01 [des202].mkv
: title: Por Trece Razones
season: 2
source: HDTV
screen_size: 720p
episode: 1
audio_codec: Dolby Digital
audio_channels: '5.1'
language: Catalan
release_group: des202
container: mkv
type: episode
? Cuerpo de Elite - Temporada 1 [HDTV 720p][Cap.113][AC3 5.1 Esp Castellano]\CuerpoDeElite720p_113_desca202.mkv
: title: Cuerpo de Elite
season: 1
source: HDTV
screen_size: 720p
episode: 13
audio_codec: Dolby Digital
audio_channels: '5.1'
language:
- Spanish
- Catalan
container: mkv
type: episode
? Show.Name.S01E01.St.Patricks.Day.1080p.mkv
: title: Show Name
season: 1
episode: 1
episode_title: St Patricks Day
screen_size: 1080p
container: mkv
type: episode
? Show.Name.S01E01.St.Patricks.Day.1080p-grp.mkv
: title: Show Name
season: 1
episode: 1
episode_title: St Patricks Day
screen_size: 1080p
release_group: grp
container: mkv
type: episode
? Titans.2018.S01E09.Hank.And.Dawn.720p.DCU.WEB-DL.AAC2.0.H264-NTb
: title: Titans
year: 2018
season: 1
episode: 9
episode_title: Hank And Dawn
screen_size: 720p
streaming_service: DC Universe
source: Web
audio_codec: AAC
audio_channels: '2.0'
video_codec: H.264
release_group: NTb
type: episode
? S.W.A.T.2017.S01E21.Treibjagd.German.Dubbed.DL.AmazonHD.x264-TVS
: title: S.W.A.T.
year: 2017
season: 1
episode: 21
episode_title: Treibjagd
language:
- German
- Multi
streaming_service: Amazon Prime
other: HD
video_codec: H.264
release_group: TVS
type: episode
? S.W.A.T.2017.S01E16.READNFO.720p.HDTV.x264-KILLERS
: title: S.W.A.T.
year: 2017
season: 1
episode: 16
other: Read NFO
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: KILLERS
type: episode
? /mnt/NAS/NoSubsTVShows/Babylon 5/Season 01/Ep. 02 - Soul Hunter
: title: Babylon 5
season: 1
episode: 2
episode_title: Soul Hunter
type: episode
? This.is.Us.S01E01.HDTV.x264-KILLERS.mkv
: title: This is Us
season: 1
episode: 1
source: HDTV
video_codec: H.264
release_group: KILLERS
container: mkv
type: episode
? Videos/Office1080/The Office (US) (2005) Season 2 S02 + Extras (1080p AMZN WEB-DL x265 HEVC 10bit AAC 2.0 LION)/The Office (US) (2005) - S02E12 - The Injury (1080p AMZN WEB-DL x265 LION).mkv
: title: The Office
country: US
year: 2005
season: 2
other: Extras
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
video_codec: H.265
video_profile: High Efficiency Video Coding
color_depth: 10-bit
audio_codec: AAC
audio_channels: '2.0'
release_group: LION
episode: 12
episode_title: The Injury
container: mkv
type: episode
? Thumping.Spike.2.E01.DF.WEBRip.720p-DRAMATV.mp4
: title: Thumping Spike 2
episode: 1
source: Web
other: Rip
screen_size: 720p
streaming_service: DramaFever
release_group: DRAMATV
container: mp4
mimetype: video/mp4
type: episode
? About.Time.E01.1080p.VIKI.WEB-DL-BLUEBERRY.mp4
: title: About Time
episode: 1
screen_size: 1080p
streaming_service: Viki
source: Web
release_group: BLUEBERRY
container: mp4
mimetype: video/mp4
type: episode
? Eyes.Of.Dawn.1991.E01.480p.MBCVOD.AAC.x264-NOGPR.mp4
: title: Eyes Of Dawn
year: 1991
season: 1991
episode: 1
screen_size: 480p
streaming_service: MBC
audio_codec: AAC
video_codec: H.264
release_group: NOGPR
container: mp4
mimetype: video/mp4
type: episode

View file

@ -815,10 +815,12 @@
? Das.Appartement.German.AC3D.DL.720p.BluRay.x264-TVP
: audio_codec: Dolby Digital
source: Blu-ray
language: mul
language:
- German
- Multi
release_group: TVP
screen_size: 720p
title: Das Appartement German
title: Das Appartement
type: movie
video_codec: H.264
@ -1723,7 +1725,7 @@
? Ant-Man.and.the.Wasp.2018.Digital.Extras.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv
: title: Ant-Man and the Wasp
year: 2018
alternative_title: Digital Extras
other: Extras
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
@ -1770,4 +1772,15 @@
audio_channels: '5.1'
video_codec: H.264
release_group: CMRG
type: movie
type: movie
? The.Girl.in.the.Spiders.Web.2019.1080p.WEB-DL.x264.AC3-EVO.mkv
: title: The Girl in the Spiders Web
year: 2019
screen_size: 1080p
source: Web
video_codec: H.264
audio_codec: Dolby Digital
release_group: EVO
container: mkv
type: movie

View file

@ -0,0 +1,467 @@
? is
: title: is
? it
: title: it
? am
: title: am
? mad
: title: mad
? men
: title: men
? man
: title: man
? run
: title: run
? sin
: title: sin
? st
: title: st
? to
: title: to
? 'no'
: title: 'no'
? non
: title: non
? war
: title: war
? min
: title: min
? new
: title: new
? car
: title: car
? day
: title: day
? bad
: title: bad
? bat
: title: bat
? fan
: title: fan
? fry
: title: fry
? cop
: title: cop
? zen
: title: zen
? gay
: title: gay
? fat
: title: fat
? one
: title: one
? cherokee
: title: cherokee
? got
: title: got
? an
: title: an
? as
: title: as
? cat
: title: cat
? her
: title: her
? be
: title: be
? hat
: title: hat
? sun
: title: sun
? may
: title: may
? my
: title: my
? mr
: title: mr
? rum
: title: rum
? pi
: title: pi
? bb
: title: bb
? bt
: title: bt
? tv
: title: tv
? aw
: title: aw
? by
: title: by
? md
: other: Mic Dubbed
? mp
: title: mp
? cd
: title: cd
? in
: title: in
? ad
: title: ad
? ice
: title: ice
? ay
: title: ay
? at
: title: at
? star
: title: star
? so
: title: so
? he
: title: he
? do
: title: do
? ax
: title: ax
? mx
: title: mx
? bas
: title: bas
? de
: title: de
? le
: title: le
? son
: title: son
? ne
: title: ne
? ca
: title: ca
? ce
: title: ce
? et
: title: et
? que
: title: que
? mal
: title: mal
? est
: title: est
? vol
: title: vol
? or
: title: or
? mon
: title: mon
? se
: title: se
? je
: title: je
? tu
: title: tu
? me
: title: me
? ma
: title: ma
? va
: title: va
? au
: country: AU
? lu
: title: lu
? wa
: title: wa
? ga
: title: ga
? ao
: title: ao
? la
: title: la
? el
: title: el
? del
: title: del
? por
: title: por
? mar
: title: mar
? al
: title: al
? un
: title: un
? ind
: title: ind
? arw
: title: arw
? ts
: source: Telesync
? ii
: title: ii
? bin
: title: bin
? chan
: title: chan
? ss
: title: ss
? san
: title: san
? oss
: title: oss
? iii
: title: iii
? vi
: title: vi
? ben
: title: ben
? da
: title: da
? lt
: title: lt
? ch
: title: ch
? sr
: title: sr
? ps
: title: ps
? cx
: title: cx
? vo
: title: vo
? mkv
: container: mkv
? avi
: container: avi
? dmd
: title: dmd
? the
: title: the
? dis
: title: dis
? cut
: title: cut
? stv
: title: stv
? des
: title: des
? dia
: title: dia
? and
: title: and
? cab
: title: cab
? sub
: title: sub
? mia
: title: mia
? rim
: title: rim
? las
: title: las
? une
: title: une
? par
: title: par
? srt
: container: srt
? ano
: title: ano
? toy
: title: toy
? job
: title: job
? gag
: title: gag
? reel
: title: reel
? www
: title: www
? for
: title: for
? ayu
: title: ayu
? csi
: title: csi
? ren
: title: ren
? moi
: title: moi
? sur
: title: sur
? fer
: title: fer
? fun
: title: fun
? two
: title: two
? big
: title: big
? psy
: title: psy
? air
: title: air
? brazil
: title: brazil
? jordan
: title: jordan
? bs
: title: bs
? kz
: title: kz
? gt
: title: gt
? im
: title: im
? pt
: language: pt
? scr
: title: scr
? sd
: title: sd
? hr
: other: High Resolution

View file

@ -5,8 +5,8 @@
: country: US
title: this is title
? This.is.us.title
: title: This is us title
? This.is.Us
: title: This is Us
? This.Is.Us
: options: --no-default-config

View file

@ -48,7 +48,7 @@
proper_count: 3
? Proper
? Proper.720p
? +Repack
? +Rerip
: other: Proper
@ -80,7 +80,7 @@
? Remux
: other: Remux
? 3D
? 3D.2019
: other: 3D
? HD

View file

@ -0,0 +1,21 @@
{
"titles": [
"13 Reasons Why",
"Star Wars: Episode VII - The Force Awakens",
"3%",
"The 100",
"3 Percent",
"This is Us",
"Open Season 2",
"Game of Thrones",
"The X-Files",
"11.22.63"
],
"suggested": [
"13 Reasons Why",
"Star Wars: Episode VII - The Force Awakens",
"The 100",
"Open Season 2",
"11.22.63"
]
}

View file

@ -1,13 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name, pointless-string-statement
import json
import os
import sys
import pytest
import six
from ..api import guessit, properties, GuessitException
from ..api import guessit, properties, suggested_expected, GuessitException
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
@ -27,12 +28,16 @@ def test_forced_binary():
assert ret and 'title' in ret and isinstance(ret['title'], six.binary_type)
@pytest.mark.skipif('sys.version_info < (3, 4)', reason="Path is not available")
@pytest.mark.skipif(sys.version_info < (3, 4), reason="Path is not available")
def test_pathlike_object():
from pathlib import Path
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
ret = guessit(path)
assert ret and 'title' in ret
try:
from pathlib import Path
path = Path('Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
ret = guessit(path)
assert ret and 'title' in ret
except ImportError: # pragma: no-cover
pass
def test_unicode_japanese():
@ -69,3 +74,10 @@ def test_exception():
assert "An internal error has occured in guessit" in str(excinfo.value)
assert "Guessit Exception Report" in str(excinfo.value)
assert "Please report at https://github.com/guessit-io/guessit/issues" in str(excinfo.value)
def test_suggested_expected():
with open(os.path.join(__location__, 'suggested.json'), 'r') as f:
content = json.load(f)
actual = suggested_expected(content['titles'])
assert actual == content['suggested']

View file

@ -7,9 +7,8 @@ import os
from io import open # pylint: disable=redefined-builtin
import babelfish
import pytest
import six
import yaml
import six # pylint:disable=wrong-import-order
import yaml # pylint:disable=wrong-import-order
from rebulk.remodule import re
from rebulk.utils import is_iterable
@ -21,13 +20,6 @@ logger = logging.getLogger(__name__)
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
filename_predicate = None
string_predicate = None
# filename_predicate = lambda filename: 'episode_title' in filename
# string_predicate = lambda string: '-DVD.BlablaBla.Fix.Blablabla.XVID' in string
class EntryResult(object):
def __init__(self, string, negates=False):
@ -134,7 +126,49 @@ class TestYml(object):
options_re = re.compile(r'^([ +-]+)(.*)')
files, ids = files_and_ids(filename_predicate)
def _get_unique_id(self, collection, base_id):
ret = base_id
i = 2
while ret in collection:
suffix = "-" + str(i)
ret = base_id + suffix
i += 1
return ret
def pytest_generate_tests(self, metafunc):
if 'yml_test_case' in metafunc.fixturenames:
entries = []
entry_ids = []
entry_set = set()
for filename, _ in zip(*files_and_ids()):
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
data = yaml.load(infile, OrderedDictYAMLLoader)
last_expected = None
for string, expected in reversed(list(data.items())):
if expected is None:
data[string] = last_expected
else:
last_expected = expected
default = None
try:
default = data['__default__']
del data['__default__']
except KeyError:
pass
for string, expected in data.items():
TestYml.set_default(expected, default)
string = TestYml.fix_encoding(string, expected)
entries.append((filename, string, expected))
unique_id = self._get_unique_id(entry_set, '[' + filename + '] ' + str(string))
entry_set.add(unique_id)
entry_ids.append(unique_id)
metafunc.parametrize('yml_test_case', entries, ids=entry_ids)
@staticmethod
def set_default(expected, default):
@ -143,34 +177,8 @@ class TestYml(object):
if k not in expected:
expected[k] = v
@pytest.mark.parametrize('filename', files, ids=ids)
def test(self, filename, caplog):
caplog.set_level(logging.INFO)
with open(os.path.join(__location__, filename), 'r', encoding='utf-8') as infile:
data = yaml.load(infile, OrderedDictYAMLLoader)
entries = Results()
last_expected = None
for string, expected in reversed(list(data.items())):
if expected is None:
data[string] = last_expected
else:
last_expected = expected
default = None
try:
default = data['__default__']
del data['__default__']
except KeyError:
pass
for string, expected in data.items():
TestYml.set_default(expected, default)
entry = self.check_data(filename, string, expected)
entries.append(entry)
entries.assert_ok()
def check_data(self, filename, string, expected):
@classmethod
def fix_encoding(cls, string, expected):
if six.PY2:
if isinstance(string, six.text_type):
string = string.encode('utf-8')
@ -183,16 +191,23 @@ class TestYml(object):
expected[k] = v
if not isinstance(string, str):
string = str(string)
if not string_predicate or string_predicate(string): # pylint: disable=not-callable
entry = self.check(string, expected)
if entry.ok:
logger.debug('[%s] %s', filename, entry)
elif entry.warning:
logger.warning('[%s] %s', filename, entry)
elif entry.error:
logger.error('[%s] %s', filename, entry)
for line in entry.details:
logger.error('[%s] %s', filename, ' ' * 4 + line)
return string
def test_entry(self, yml_test_case):
filename, string, expected = yml_test_case
result = self.check_data(filename, string, expected)
assert not result.error
def check_data(self, filename, string, expected):
entry = self.check(string, expected)
if entry.ok:
logger.debug('[%s] %s', filename, entry)
elif entry.warning:
logger.warning('[%s] %s', filename, entry)
elif entry.error:
logger.error('[%s] %s', filename, entry)
for line in entry.details:
logger.error('[%s] %s', filename, ' ' * 4 + line)
return entry
def check(self, string, expected):

View file

@ -946,3 +946,254 @@
source: Blu-ray
audio_codec: DTS-HD
type: movie
? Mr Robot - S03E01 - eps3 0 power-saver-mode h (1080p AMZN WEB-DL x265 HEVC 10bit EAC3 6.0 RCVR).mkv
: title: Mr Robot
season: 3
episode: 1
episode_title: eps3 0 power-saver-mode h
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
video_codec: H.265
video_profile: High Efficiency Video Coding
color_depth: 10-bit
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
release_group: RCVR
container: mkv
type: episode
? Panorama.15-05-2018.Web-DL.540p.H264.AAC.Subs.mp4
: title: Panorama
date: 2018-05-15
source: Web
screen_size: 540p
video_codec: H.264
audio_codec: AAC
subtitle_language: und
container: mp4
type: episode
? Shaolin 2011.720p.BluRay.x264-x0r.mkv
: title: Shaolin
year: 2011
screen_size: 720p
source: Blu-ray
video_codec: H.264
release_group: x0r
container: mkv
type: movie
? '[ Engineering Catastrophes S02E10 1080p AMZN WEB-DL DD+ 2.0 x264-TrollHD ]'
: title: Engineering Catastrophes
season: 2
episode: 10
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '2.0'
video_codec: H.264
release_group: TrollHD
type: episode
? A Very Harold & Kumar 3D Christmas (2011).mkv
: title: A Very Harold & Kumar 3D Christmas
year: 2011
container: mkv
type: movie
? Cleveland.Hustles.S01E03.Downward.Dogs.and.Proper.Pigs.720p.HDTV.x264-W4F
: title: Cleveland Hustles
season: 1
episode: 3
episode_title: Downward Dogs and Proper Pigs
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: W4F
type: episode
? Pawn.Stars.S12E20.The.Pawn.Awakens.REAL.READ.NFO.720p.HDTV.x264-DHD
: title: Pawn Stars
season: 12
episode: 20
episode_title: The Pawn Awakens
other:
- Proper
- Read NFO
proper_count: 2
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: DHD
type: episode
? Pawn.Stars.S12E22.Racing.Revolution.REAL.720p.HDTV.x264-DHD
: title: Pawn Stars
season: 12
episode: 22
episode_title: Racing Revolution
other: Proper
proper_count: 2
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: DHD
type: episode
? Luksusfellen.S18E02.REAL.NORWEGiAN.720p.WEB.h264-NORPiLT
: title: Luksusfellen
season: 18
episode: 2
other: Proper
proper_count: 2
language: Norwegian
screen_size: 720p
source: Web
video_codec: H.264
release_group: NORPiLT
type: episode
? The.Exorcist.S02E07.REAL.FRENCH.720p.HDTV.x264-SH0W
: title: The Exorcist
season: 2
episode: 7
other: Proper
proper_count: 2
language: fr
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: SH0W
type: episode
? Outrageous.Acts.of.Science.S05E02.Is.This.for.Real.720p.HDTV.x264-DHD
: title: Outrageous Acts of Science
season: 5
episode: 2
# corner case
# episode_title: Is This for Real
screen_size: 720p
source: HDTV
video_codec: H.264
release_group: DHD
type: episode
? How.the.Universe.Works.S06E08.Strange.Lives.of.Dwarf.Planets.REAL.720p.WEB.x264-DHD
: title: How the Universe Works
season: 6
episode: 8
episode_title: Strange Lives of Dwarf Planets
other: Proper
proper_count: 2
screen_size: 720p
source: Web
video_codec: H.264
release_group: DHD
type: episode
? Vampirina.S01E16.REAL.HDTV.x264-W4F
: title: Vampirina
season: 1
episode: 16
other: Proper
proper_count: 2
source: HDTV
video_codec: H.264
release_group: W4F
type: episode
? Test.S01E16.Some Real Episode Title.HDTV.x264-W4F
: title: Test
season: 1
episode: 16
episode_title: Some Real Episode Title
source: HDTV
video_codec: H.264
release_group: W4F
type: episode
? NOS4A2.S01E01.The.Shorter.Way.REPACK.720p.AMZN.WEB-DL.DDP5.1.H.264-NTG.mkv
: title: NOS4A2
season: 1
episode: 1
episode_title: The Shorter Way
other: Proper
proper_count: 1
screen_size: 720p
streaming_service: Amazon Prime
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
video_codec: H.264
release_group: NTG
container: mkv
type: episode
? Star Trek DS9 Ep 2x03 The Siege (Part III)
: title: Star Trek DS9
season: 2
episode: 3
episode_title: The Siege
part: 3
type: episode
? The.Red.Line.S01E01
: title: The Red Line
season: 1
episode: 1
type: episode
? Show.S01E01.WEB.x264-METCON.mkv
: title: Show
season: 1
episode: 1
source: Web
video_codec: H.264
release_group: METCON
container: mkv
type: episode
? Show.S01E01.WEB.x264-TCMEON.mkv
: title: Show
season: 1
episode: 1
source: Web
video_codec: H.264
release_group: TCMEON
container: mkv
type: episode
? Show.S01E01.WEB.x264-MEONTC.mkv
: title: Show
season: 1
episode: 1
source: Web
video_codec: H.264
release_group: MEONTC
container: mkv
type: episode
? '[TorrentCouch.com].Westworld.S02.Complete.720p.WEB-DL.x264.[MP4].[5.3GB].[Season.2.Full]/[TorrentCouch.com].Westworld.S02E03.720p.WEB-DL.x264.mp4'
: website: TorrentCouch.com
title: Westworld
season: 2
other: Complete
screen_size: 720p
source: Web
video_codec: H.264
container: mp4
size: 5.3GB
episode: 3
type: episode
? Vita.&.Virginia.2018.720p.H.264.YTS.LT.mp4
: title: Vita & Virginia
year: 2018
screen_size: 720p
video_codec: H.264
release_group: YTS.LT
container: mp4
type: movie

View file

@ -10,19 +10,19 @@ except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
import babelfish
import yaml
import yaml # pylint:disable=wrong-import-order
from .rules.common.quantity import BitRate, FrameRate, Size
class OrderedDictYAMLLoader(yaml.Loader):
class OrderedDictYAMLLoader(yaml.SafeLoader):
"""
A YAML loader that loads mappings into ordered dictionaries.
From https://gist.github.com/enaeseth/844388
"""
def __init__(self, *args, **kwargs):
yaml.Loader.__init__(self, *args, **kwargs)
yaml.SafeLoader.__init__(self, *args, **kwargs)
self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)
self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map)
@ -58,7 +58,7 @@ class CustomDumper(yaml.SafeDumper):
"""
Custom YAML Dumper.
"""
pass
pass # pylint:disable=unnecessary-pass
def default_representer(dumper, data):