Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2
Updates rebulk to 2.0.1
This commit is contained in:
Labrys of Knossos 2022-11-28 19:44:46 -05:00
commit 2226a74ef8
66 changed files with 2995 additions and 1306 deletions

View file

@ -25,7 +25,7 @@ def _potential_before(i, input_string):
:return:
:rtype: bool
"""
return i - 2 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
def _potential_after(i, input_string):

View file

@ -28,7 +28,7 @@ def int_coercable(string):
return False
def compose(*validators):
def and_(*validators):
"""
Compose validators functions
:param validators:
@ -49,3 +49,26 @@ def compose(*validators):
return False
return True
return composed
def or_(*validators):
"""
Compose validators functions
:param validators:
:type validators:
:return:
:rtype:
"""
def composed(string):
"""
Composed validators function
:param string:
:type string:
:return:
:rtype:
"""
for validator in validators:
if validator(string):
return True
return False
return composed

View file

@ -0,0 +1,20 @@
"""
Match processors
"""
from guessit.rules.common import seps
def strip(match, chars=seps):
"""
Strip given characters from match.
:param chars:
:param match:
:return:
"""
while match.input_string[match.start] in chars:
match.start += 1
while match.input_string[match.end - 1] in chars:
match.end -= 1
if not match:
return False

View file

@ -34,7 +34,9 @@ class EnlargeGroupMatches(CustomRule):
for match in matches.ending(group.end - 1):
ending.append(match)
return starting, ending
if starting or ending:
return starting, ending
return False
def then(self, matches, when_response, context):
starting, ending = when_response

View file

@ -3,9 +3,8 @@
"""
audio_codec, audio_profile and audio_channels property
"""
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from ..common import dash
from ..common.pattern import is_disabled
@ -23,7 +22,9 @@ def audio_codec(config): # pylint:disable=unused-argument
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk = Rebulk()\
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
.string_defaults(ignore_case=True)
def audio_codec_priority(match1, match2):
"""
@ -61,7 +62,9 @@ def audio_codec(config): # pylint:disable=unused-argument
rebulk.string('PCM', value='PCM')
rebulk.string('LPCM', value='LPCM')
rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.defaults(clear=True,
name='audio_profile',
disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
@ -70,17 +73,19 @@ def audio_codec(config): # pylint:disable=unused-argument
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True)
rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True)
rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True)
rebulk.defaults(clear=True,
name="audio_channels",
disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
rebulk.string('7ch', '8ch', value='7.1')
rebulk.string('5ch', '6ch', value='5.1')
rebulk.string('2ch', 'stereo', value='2.0')
rebulk.string('1ch', 'mono', value='1.0')
for value, items in config.get('audio_channels').items():
for item in items:
if item.startswith('re:'):
rebulk.regex(item[3:], value=value, children=True)
else:
rebulk.string(item, value=value)
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
AudioChannelsValidatorRule)

View file

@ -69,4 +69,6 @@ class BitRateTypeRule(Rule):
else:
to_rename.append(match)
return to_rename, to_remove
if to_rename or to_remove:
return to_rename, to_remove
return False

View file

@ -26,7 +26,8 @@ def bonus(config): # pylint:disable=unused-argument
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
validator={'__parent__': lambda match: seps_surround},
validator={'__parent__': seps_surround},
validate_all=True,
conflict_solver=lambda match, conflicting: match
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
else '__default__')

View file

@ -44,7 +44,8 @@ def container(config):
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
rebulk.defaults(name='container',
rebulk.defaults(clear=True,
name='container',
validator=seps_surround,
formatter=lambda s: s.lower(),
conflict_solver=lambda match, other: match

View file

@ -10,6 +10,7 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PRO
from ..common import seps, title_seps
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import or_
from ..properties.title import TitleFromPosition, TitleBaseRule
from ..properties.type import TypeProcessor
@ -133,8 +134,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):
def hole_filter(self, hole, matches):
episode = matches.previous(hole,
lambda previous: any(name in previous.names
for name in self.previous_names),
lambda previous: previous.named(*self.previous_names),
0)
crc32 = matches.named('crc32')
@ -179,8 +179,7 @@ class AlternativeTitleReplace(Rule):
predicate=lambda match: 'title' in match.tags, index=0)
if main_title:
episode = matches.previous(main_title,
lambda previous: any(name in previous.names
for name in self.previous_names),
lambda previous: previous.named(*self.previous_names),
0)
crc32 = matches.named('crc32')
@ -249,7 +248,7 @@ class Filepart3EpisodeTitle(Rule):
if season:
hole = matches.holes(subdirectory.start, subdirectory.end,
ignore=lambda match: 'weak-episode' in match.tags,
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
index=0)
if hole:
@ -292,7 +291,8 @@ class Filepart2EpisodeTitle(Rule):
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
if season:
hole = matches.holes(directory.start, directory.end, ignore=lambda match: 'weak-episode' in match.tags,
hole = matches.holes(directory.start, directory.end,
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
formatter=cleanup, seps=title_seps,
predicate=lambda match: match.value, index=0)
if hole:

View file

@ -11,12 +11,13 @@ from rebulk.match import Match
from rebulk.remodule import re
from rebulk.utils import is_iterable
from guessit.rules import match_processors
from guessit.rules.common.numeral import parse_numeral, numeral
from .title import TitleFromPosition
from ..common import dash, alt_dash, seps, seps_no_fs
from ..common.formatters import strip
from ..common.numeral import numeral, parse_numeral
from ..common.pattern import is_disabled
from ..common.validators import compose, seps_surround, seps_before, int_coercable
from ..common.validators import seps_surround, int_coercable, and_
from ...reutils import build_or_pattern
@ -29,17 +30,12 @@ def episodes(config):
:return: Created Rebulk object
:rtype: Rebulk
"""
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
def is_season_episode_disabled(context):
"""Whether season and episode rules should be enabled."""
return is_disabled(context, 'episode') or is_disabled(context, 'season')
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
def episodes_season_chain_breaker(matches):
"""
Break chains if there's more than 100 offset between two neighbor values.
@ -57,8 +53,6 @@ def episodes(config):
return True
return False
rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
def season_episode_conflict_solver(match, other):
"""
Conflict solver for episode/season patterns
@ -76,7 +70,6 @@ def episodes(config):
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
and not match.initiator.children.named(match.name + 'Marker')) or (
other.name == 'screen_size' and not int_coercable(other.raw)):
return match
if other.name in ('season', 'episode') and match.initiator != other.initiator:
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
@ -87,21 +80,6 @@ def episodes(config):
return current
return '__default__'
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
max_range_gap = config['max_range_gap']
def ordering_validator(match):
"""
Validator for season list. They should be in natural order to be validated.
@ -135,65 +113,18 @@ def episodes(config):
lambda m: m.name == property_name + 'Separator')
separator = match.children.previous(current_match,
lambda m: m.name == property_name + 'Separator', 0)
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
break
if separator:
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
break
previous_match = current_match
return valid
return is_consecutive('episode') and is_consecutive('season')
# S01E02, 01x02, S01S02S03
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
children=True,
private_parent=True,
validate_all=True,
validator={'__parent__': ordering_validator},
conflict_solver=season_episode_conflict_solver,
disabled=is_season_episode_disabled) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}).repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail, value=episode_detail, name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
def validate_roman(match):
"""
Validate a roman match if surrounded by separators
@ -206,117 +137,203 @@ def episodes(config):
return True
return seps_surround(match)
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
max_range_gap = config['max_range_gap']
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.string_defaults(ignore_case=True) \
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver,
abbreviations=[alt_dash])
# S01E02, 01x02, S01S02S03
rebulk.chain(
tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
.repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)').repeater('+') \
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
r'@?(?P<episode>\d+)') \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*')
rebulk.chain(tags=['SxxExx'],
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
rebulk.string(episode_detail,
private_parent=False,
children=False,
value=episode_detail,
name='episode_details',
disabled=lambda context: is_disabled(context, 'episode_details'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
validate_all=True,
validator={'__parent__': and_(seps_surround, ordering_validator)},
children=True,
private_parent=True,
conflict_solver=season_episode_conflict_solver)
rebulk.chain(abbreviations=[alt_dash],
rebulk.chain(validate_all=True,
conflict_solver=season_episode_conflict_solver,
formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'__parent__': compose(seps_surround, ordering_validator),
validator={'__parent__': and_(seps_surround, ordering_validator),
'season': validate_roman,
'count': validate_roman},
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
.defaults(validator=None) \
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'season': validate_roman, 'count': validate_roman},
conflict_solver=season_episode_conflict_solver) \
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
name='seasonSeparator', escape=True) +
r'@?(?P<season>\d+)').repeater('*')
rebulk.defaults(abbreviations=[dash])
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash],
validator={'episode': validate_roman},
formatter={'episode': parse_numeral, 'version': int, 'count': int},
formatter={'episode': parse_numeral},
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
tags=['SxxExx'],
abbreviations=[dash],
validator=None,
formatter={'season': int, 'other': lambda match: 'Complete'},
formatter={'other': lambda match: 'Complete'},
disabled=lambda context: is_disabled(context, 'season'))
# 12, 13
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
# 012, 013
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'0(?P<episode>\d{1,2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# 112, 113
rebulk.chain(tags=['weak-episode'],
formatter={'episode': int, 'version': int},
name='weak_episode',
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
.regex(r'(?P<episode>\d{3,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
# 1, 2, 3
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
rebulk.chain(tags=['weak-episode'],
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
.defaults(validator=None) \
.defaults(validator=None, tags=['weak-episode']) \
.regex(r'(?P<episode>\d)') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
# e112, e113, 1e18, 3e19
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
rebulk.chain(formatter={'season': int, 'episode': int, 'version': int},
disabled=lambda context: is_disabled(context, 'episode')) \
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# ep 112, ep113, ep112, ep113
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int},
disabled=lambda context: is_disabled(context, 'episode')) \
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
.defaults(validator=None) \
.regex(r'ep-?(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
# cap 112, cap 112_114
rebulk.chain(abbreviations=[dash],
tags=['see-pattern'],
formatter={'season': int, 'episode': int},
rebulk.chain(tags=['see-pattern'],
disabled=is_season_episode_disabled) \
.defaults(validator=None) \
.defaults(validator=None, tags=['see-pattern']) \
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
# 102, 0102
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
formatter={'season': int, 'episode': int, 'version': int},
name='weak_duplicate',
conflict_solver=season_episode_conflict_solver,
disabled=lambda context: (context.get('episode_prefer_number', False) or
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
.defaults(validator=None) \
.defaults(tags=['weak-episode', 'weak-duplicate'],
name='weak_duplicate',
validator=None,
conflict_solver=season_episode_conflict_solver) \
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int,
rebulk.regex(r'v(?P<version>\d+)',
formatter=int,
disabled=lambda context: is_disabled(context, 'version'))
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
@ -325,18 +342,23 @@ def episodes(config):
# detached of X count (season/episode)
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
abbreviations=[dash], children=True, private_parent=True, formatter=int,
formatter=int,
pre_match_processor=match_processors.strip,
disabled=lambda context: is_disabled(context, 'episode'))
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode",
rebulk.regex(r'Minisodes?',
children=False,
private_parent=False,
name='episode_format',
value="Minisode",
disabled=lambda context: is_disabled(context, 'episode_format'))
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
SeePatternRange(range_separators + ['_']),
EpisodeNumberSeparatorRange(range_separators),
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
return rebulk
@ -416,7 +438,9 @@ class WeakConflictSolver(Rule):
if to_append:
to_remove.extend(weak_dup_matches)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class CountValidator(Rule):
@ -442,7 +466,9 @@ class CountValidator(Rule):
season_count.append(count)
else:
to_remove.append(count)
return to_remove, episode_count, season_count
if to_remove or episode_count or season_count:
return to_remove, episode_count, season_count
return False
class SeePatternRange(Rule):
@ -477,7 +503,9 @@ class SeePatternRange(Rule):
to_remove.append(separator)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class AbstractSeparatorRange(Rule):
@ -533,7 +561,9 @@ class AbstractSeparatorRange(Rule):
previous_match = next_match
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class RenameToAbsoluteEpisode(Rule):
@ -629,20 +659,41 @@ class RemoveWeak(Rule):
Remove weak-episode matches which appears after video, source, and audio matches.
"""
priority = 16
consequence = RemoveMatch
consequence = RemoveMatch, AppendMatch
def __init__(self, episode_words):
super(RemoveWeak, self).__init__()
self.episode_words = episode_words
def when(self, matches, context):
to_remove = []
to_append = []
for filepart in matches.markers.named('path'):
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
if weaks:
previous = matches.previous(weaks[0], predicate=lambda m: m.name in (
weak = weaks[0]
previous = matches.previous(weak, predicate=lambda m: m.name in (
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
'audio_channels', 'audio_profile'), index=0)
if previous and not matches.holes(
previous.end, weaks[0].start, predicate=lambda m: m.raw.strip(seps)):
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
if previous.raw.lower() in self.episode_words:
try:
episode = copy.copy(weak)
episode.name = 'episode'
episode.value = int(weak.value)
episode.start = previous.start
episode.private = False
episode.tags = []
to_append.append(episode)
except ValueError:
pass
to_remove.extend(weaks)
return to_remove
if to_remove or to_append:
return to_remove, to_append
return False
class RemoveWeakIfSxxExx(Rule):
@ -856,4 +907,6 @@ class RenameToDiscMatch(Rule):
markers.append(marker)
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
return discs, markers, to_remove
if discs or markers or to_remove:
return discs, markers, to_remove
return False

View file

@ -72,6 +72,8 @@ def language(config, common_words):
UNDETERMINED = babelfish.Language('und')
MULTIPLE = babelfish.Language('mul')
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
@ -388,7 +390,9 @@ class SubtitlePrefixLanguageRule(Rule):
to_remove.extend(matches.conflicting(lang))
if prefix in to_remove:
to_remove.remove(prefix)
return to_rename, to_remove
if to_rename or to_remove:
return to_rename, to_remove
return False
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
@ -425,7 +429,9 @@ class SubtitleSuffixLanguageRule(Rule):
to_append.append(lang)
if suffix in to_remove:
to_remove.remove(suffix)
return to_append, to_remove
if to_append or to_remove:
return to_append, to_remove
return False
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
@ -478,6 +484,7 @@ class RemoveInvalidLanguages(Rule):
"""Remove language matches that matches the blacklisted common words."""
consequence = RemoveMatch
priority = 32
def __init__(self, common_words):
"""Constructor."""

View file

@ -11,7 +11,7 @@ from rebulk.remodule import re
from ..common import dash
from ..common import seps
from ..common.pattern import is_disabled
from ..common.validators import seps_after, seps_before, seps_surround, compose
from ..common.validators import seps_after, seps_before, seps_surround, and_
from ...reutils import build_or_pattern
from ...rules.common.formatters import raw_cleanup
@ -35,11 +35,16 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
rebulk.string('Proper', 'Repack', 'Rerip', value='Proper',
rebulk.string('Repack', 'Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Proper', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.regex('Real', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
@ -72,16 +77,18 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
value={'other': 'Complete'},
tags=['release-group-prefix'],
validator={'__parent__': compose(seps_surround, validate_complete)})
validator={'__parent__': and_(seps_surround, validate_complete)})
rebulk.string('R5', value='Region 5')
rebulk.string('RC', value='Region C')
rebulk.regex('Pre-?Air', value='Preair')
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'):
for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
rebulk.string(value, value=value)
rebulk.string('3D', value='3D', tags='has-neighbor')
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
rebulk.string('HR', value='High Resolution')
@ -90,6 +97,7 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.string('mHD', 'HDLight', value='Micro HD')
rebulk.string('LDTV', value='Low Definition')
rebulk.string('HFR', value='High Frame Rate')
rebulk.string('VFR', value='Variable Frame Rate')
rebulk.string('HD', value='HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
@ -128,13 +136,15 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
rebulk.string('Extras', value='Extras', tags='has-neighbor')
rebulk.regex('Digital-?Extras?', value='Extras')
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
ValidateAtEnd, ProperCountRule)
ValidateAtEnd, ValidateReal, ProperCountRule)
return rebulk
@ -354,3 +364,20 @@ class ValidateAtEnd(Rule):
to_remove.append(match)
return to_remove
class ValidateReal(Rule):
"""
Validate Real
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
if not matches.range(filepart.start, match.start):
ret.append(match)
return ret

View file

@ -8,7 +8,7 @@ from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_surround, int_coercable, compose
from ..common.validators import seps_surround, int_coercable, and_
from ..common.numeral import numeral, parse_numeral
from ...reutils import build_or_pattern
@ -41,6 +41,6 @@ def part(config): # pylint:disable=unused-argument
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
return rebulk

View file

@ -9,8 +9,8 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
from rebulk.match import Match
from ..common import seps
from ..common.expected import build_expected_function
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import int_coercable, seps_surround
@ -50,7 +50,7 @@ def release_group(config):
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
string = string[:len(forbidden)]
string = string.strip(groupname_seps)
return string
return string.strip()
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
@ -72,7 +72,9 @@ _scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'a
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
_scene_previous_tags = ('release-group-prefix', )
_scene_previous_tags = ('release-group-prefix',)
_scene_no_previous_tags = ('no-release-group-prefix',)
class DashSeparatedReleaseGroup(Rule):
@ -193,7 +195,8 @@ class DashSeparatedReleaseGroup(Rule):
if releasegroup.value:
to_append.append(releasegroup)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
class SceneReleaseGroup(Rule):
@ -212,6 +215,17 @@ class SceneReleaseGroup(Rule):
super(SceneReleaseGroup, self).__init__()
self.value_formatter = value_formatter
@staticmethod
def is_previous_match(match):
"""
Check if match can precede release_group
:param match:
:return:
"""
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
match.tagged(*_scene_previous_tags)
def when(self, matches, context): # pylint:disable=too-many-locals
# If a release_group is found before, ignore this kind of release_group rule.
@ -253,13 +267,12 @@ class SceneReleaseGroup(Rule):
if match.start < filepart.start:
return False
return not match.private or match.name in _scene_previous_names
return not match.private or self.is_previous_match(match)
previous_match = matches.previous(last_hole,
previous_match_filter,
index=0)
if previous_match and (previous_match.name in _scene_previous_names or
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
if previous_match and (self.is_previous_match(previous_match)) and \
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
and not int_coercable(last_hole.value.strip(seps)):
@ -300,11 +313,11 @@ class AnimeReleaseGroup(Rule):
# If a release_group is found before, ignore this kind of release_group rule.
if matches.named('release_group'):
return to_remove, to_append
return False
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
# This doesn't seems to be an anime, and we already found another release_group.
return to_remove, to_append
return False
for filepart in marker_sorted(matches.markers.named('path'), matches):
@ -328,4 +341,7 @@ class AnimeReleaseGroup(Rule):
to_append.append(group)
to_remove.extend(matches.range(empty_group.start, empty_group.end,
lambda m: 'weak-language' in m.tags))
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False

View file

@ -24,8 +24,8 @@ def screen_size(config):
:return: Created Rebulk object
:rtype: Rebulk
"""
interlaced = frozenset({res for res in config['interlaced']})
progressive = frozenset({res for res in config['progressive']})
interlaced = frozenset(config['interlaced'])
progressive = frozenset(config['progressive'])
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
min_ar = config['min_ar']
max_ar = config['max_ar']

View file

@ -12,7 +12,7 @@ from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
from .audio_codec import HqConflictRule
from ..common import dash, seps
from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after
from ..common.validators import seps_before, seps_after, or_
def source(config): # pylint:disable=unused-argument
@ -26,7 +26,10 @@ def source(config): # pylint:disable=unused-argument
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix'])
rebulk = rebulk.defaults(name='source',
tags=['video-codec-prefix', 'streaming_service.suffix'],
validate_all=True,
validator={'__parent__': or_(seps_before, seps_after)})
rip_prefix = '(?P<other>Rip)-?'
rip_suffix = '-?(?P<other>Rip)'
@ -42,7 +45,7 @@ def source(config): # pylint:disable=unused-argument
def demote_other(match, other): # pylint: disable=unused-argument
"""Default conflict solver with 'other' property."""
return other if other.name == 'other' else '__default__'
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
value={'source': 'VHS', 'other': 'Rip'})
@ -92,8 +95,9 @@ def source(config): # pylint:disable=unused-argument
# WEBCap is a synonym to WEBRip, mostly used by non english
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'WEB', 'DL-?WEB', 'DL(?=-?Mux)'),
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
value={'source': 'Web'})
rebulk.regex('(WEB)', value='Web', tags='weak.source')
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
value={'source': 'HD-DVD', 'other': 'Rip'})
@ -118,7 +122,7 @@ def source(config): # pylint:disable=unused-argument
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
value={'source': 'Satellite', 'other': 'Rip'})
rebulk.rules(ValidateSource, UltraHdBlurayRule)
rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
return rebulk
@ -170,32 +174,62 @@ class UltraHdBlurayRule(Rule):
to_remove.append(match)
to_append.append(new_source)
return to_remove, to_append
if to_remove or to_append:
return to_remove, to_append
return False
class ValidateSource(Rule):
class ValidateSourcePrefixSuffix(Rule):
"""
Validate source with screener property, with video_codec property or separated
Validate source with source prefix, source suffix.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for match in matches.named('source'):
match = match.initiator
if not seps_before(match) and \
not matches.range(match.start - 1, match.start - 2,
lambda m: 'source-prefix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
if not seps_after(match) and \
not matches.range(match.end, match.end + 1,
lambda m: 'source-suffix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
match = match.initiator
if not seps_before(match) and \
not matches.range(match.start - 1, match.start - 2,
lambda m: 'source-prefix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
if not seps_after(match) and \
not matches.range(match.end, match.end + 1,
lambda m: 'source-suffix' in m.tags):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret
class ValidateWeakSource(Rule):
"""
Validate weak source
"""
dependency = [ValidateSourcePrefixSuffix]
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for filepart in matches.markers.named('path'):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
# if there are more than 1 source in this filepart, just before the year and with holes for the title
# most likely the source is part of the title
if 'weak.source' in match.tags \
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
and matches.holes(filepart.start, match.start,
predicate=lambda m: m.value.strip(seps), index=-1):
if match.children:
ret.extend(match.children)
ret.append(match)
continue
return ret

View file

@ -25,133 +25,13 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
rebulk.string('AE', 'A&E', value='A&E')
rebulk.string('AMBC', value='ABC')
rebulk.string('AUBC', value='ABC Australia')
rebulk.string('AJAZ', value='Al Jazeera English')
rebulk.string('AMC', value='AMC')
rebulk.string('AMZN', 'Amazon', value='Amazon Prime')
rebulk.regex('Amazon-?Prime', value='Amazon Prime')
rebulk.string('AS', value='Adult Swim')
rebulk.regex('Adult-?Swim', value='Adult Swim')
rebulk.string('ATK', value="America's Test Kitchen")
rebulk.string('ANPL', value='Animal Planet')
rebulk.string('ANLB', value='AnimeLab')
rebulk.string('AOL', value='AOL')
rebulk.string('ARD', value='ARD')
rebulk.string('iP', value='BBC iPlayer')
rebulk.regex('BBC-?iPlayer', value='BBC iPlayer')
rebulk.string('BRAV', value='BravoTV')
rebulk.string('CNLP', value='Canal+')
rebulk.string('CN', value='Cartoon Network')
rebulk.string('CBC', value='CBC')
rebulk.string('CBS', value='CBS')
rebulk.string('CNBC', value='CNBC')
rebulk.string('CC', value='Comedy Central')
rebulk.string('4OD', value='Channel 4')
rebulk.string('CHGD', value='CHRGD')
rebulk.string('CMAX', value='Cinemax')
rebulk.string('CMT', value='Country Music Television')
rebulk.regex('Comedy-?Central', value='Comedy Central')
rebulk.string('CCGC', value='Comedians in Cars Getting Coffee')
rebulk.string('CR', value='Crunchy Roll')
rebulk.string('CRKL', value='Crackle')
rebulk.regex('Crunchy-?Roll', value='Crunchy Roll')
rebulk.string('CSPN', value='CSpan')
rebulk.string('CTV', value='CTV')
rebulk.string('CUR', value='CuriosityStream')
rebulk.string('CWS', value='CWSeed')
rebulk.string('DSKI', value='Daisuki')
rebulk.string('DHF', value='Deadhouse Films')
rebulk.string('DDY', value='Digiturk Diledigin Yerde')
rebulk.string('DISC', 'Discovery', value='Discovery')
rebulk.string('DSNY', 'Disney', value='Disney')
rebulk.string('DIY', value='DIY Network')
rebulk.string('DOCC', value='Doc Club')
rebulk.string('DPLY', value='DPlay')
rebulk.string('ETV', value='E!')
rebulk.string('EPIX', value='ePix')
rebulk.string('ETTV', value='El Trece')
rebulk.string('ESPN', value='ESPN')
rebulk.string('ESQ', value='Esquire')
rebulk.string('FAM', value='Family')
rebulk.string('FJR', value='Family Jr')
rebulk.string('FOOD', value='Food Network')
rebulk.string('FOX', value='Fox')
rebulk.string('FREE', value='Freeform')
rebulk.string('FYI', value='FYI Network')
rebulk.string('GLBL', value='Global')
rebulk.string('GLOB', value='GloboSat Play')
rebulk.string('HLMK', value='Hallmark')
rebulk.string('HBO', value='HBO Go')
rebulk.regex('HBO-?Go', value='HBO Go')
rebulk.string('HGTV', value='HGTV')
rebulk.string('HIST', 'History', value='History')
rebulk.string('HULU', value='Hulu')
rebulk.string('ID', value='Investigation Discovery')
rebulk.string('IFC', value='IFC')
rebulk.string('iTunes', 'iT', value='iTunes')
rebulk.string('ITV', value='ITV')
rebulk.string('KNOW', value='Knowledge Network')
rebulk.string('LIFE', value='Lifetime')
rebulk.string('MTOD', value='Motor Trend OnDemand')
rebulk.string('MNBC', value='MSNBC')
rebulk.string('MTV', value='MTV')
rebulk.string('NATG', value='National Geographic')
rebulk.regex('National-?Geographic', value='National Geographic')
rebulk.string('NBA', value='NBA TV')
rebulk.regex('NBA-?TV', value='NBA TV')
rebulk.string('NBC', value='NBC')
rebulk.string('NF', 'Netflix', value='Netflix')
rebulk.string('NFL', value='NFL')
rebulk.string('NFLN', value='NFL Now')
rebulk.string('GC', value='NHL GameCenter')
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
rebulk.string('NRK', value='Norsk Rikskringkasting')
rebulk.string('PBS', value='PBS')
rebulk.string('PBSK', value='PBS Kids')
rebulk.string('PSN', value='Playstation Network')
rebulk.string('PLUZ', value='Pluzz')
rebulk.string('RTE', value='RTE One')
rebulk.string('SBS', value='SBS (AU)')
rebulk.string('SESO', 'SeeSo', value='SeeSo')
rebulk.string('SHMI', value='Shomi')
rebulk.string('SPIK', value='Spike')
rebulk.string('SPKE', value='Spike TV')
rebulk.regex('Spike-?TV', value='Spike TV')
rebulk.string('SNET', value='Sportsnet')
rebulk.string('SPRT', value='Sprout')
rebulk.string('STAN', value='Stan')
rebulk.string('STZ', value='Starz')
rebulk.string('SVT', value='Sveriges Television')
rebulk.string('SWER', value='SwearNet')
rebulk.string('SYFY', value='Syfy')
rebulk.string('TBS', value='TBS')
rebulk.string('TFOU', value='TFou')
rebulk.string('CW', value='The CW')
rebulk.regex('The-?CW', value='The CW')
rebulk.string('TLC', value='TLC')
rebulk.string('TUBI', value='TubiTV')
rebulk.string('TV3', value='TV3 Ireland')
rebulk.string('TV4', value='TV4 Sweeden')
rebulk.string('TVL', value='TV Land')
rebulk.regex('TV-?Land', value='TV Land')
rebulk.string('UFC', value='UFC')
rebulk.string('UKTV', value='UKTV')
rebulk.string('UNIV', value='Univision')
rebulk.string('USAN', value='USA Network')
rebulk.string('VLCT', value='Velocity')
rebulk.string('VH1', value='VH1')
rebulk.string('VICE', value='Viceland')
rebulk.string('VMEO', value='Vimeo')
rebulk.string('VRV', value='VRV')
rebulk.string('WNET', value='W Network')
rebulk.string('WME', value='WatchMe')
rebulk.string('WWEN', value='WWE Network')
rebulk.string('XBOX', value='Xbox Video')
rebulk.string('YHOO', value='Yahoo')
rebulk.string('RED', value='YouTube Red')
rebulk.string('ZDF', value='ZDF')
for value, items in config.items():
patterns = items if isinstance(items, list) else [items]
for pattern in patterns:
if pattern.startswith('re:'):
rebulk.regex(pattern, value=value)
else:
rebulk.string(pattern, value=value)
rebulk.rules(ValidateStreamingService)
@ -161,7 +41,7 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
class ValidateStreamingService(Rule):
"""Validate streaming service matches."""
priority = 32
priority = 128
consequence = RemoveMatch
def when(self, matches, context):

View file

@ -8,7 +8,12 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
from rebulk.formatters import formatters
from .film import FilmTitleRule
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
from .language import (
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
SubtitleExtensionRule,
NON_SPECIFIC_LANGUAGES
)
from ..common import seps, title_seps
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
@ -88,12 +93,19 @@ class TitleBaseRule(Rule):
:rtype:
"""
cropped_holes = []
group_markers = matches.markers.named('group')
for group_marker in group_markers:
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
if path_marker and path_marker.span == group_marker.span:
group_markers.remove(group_marker)
for hole in holes:
group_markers = matches.markers.named('group')
cropped_holes.extend(hole.crop(group_markers))
return cropped_holes
def is_ignored(self, match):
@staticmethod
def is_ignored(match):
"""
Ignore matches when scanning for title (hole).
@ -130,7 +142,8 @@ class TitleBaseRule(Rule):
for outside in outside_matches:
other_languages.extend(matches.range(outside.start, outside.end,
lambda c_match: c_match.name == match.name and
c_match not in to_keep))
c_match not in to_keep and
c_match.value not in NON_SPECIFIC_LANGUAGES))
if not other_languages and (not starting or len(match.raw) <= 3):
return True
@ -239,7 +252,7 @@ class TitleBaseRule(Rule):
to_remove = []
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
return ret, to_remove
return False
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
@ -272,7 +285,9 @@ class TitleBaseRule(Rule):
ret.extend(titles)
to_remove.extend(to_remove_c)
return ret, to_remove
if ret or to_remove:
return ret, to_remove
return False
class TitleFromPosition(TitleBaseRule):
@ -329,4 +344,6 @@ class PreferTitleWithYear(Rule):
for title_match in titles:
if title_match.value not in title_values:
to_remove.append(title_match)
return to_remove, to_tag
if to_remove or to_tag:
return to_remove, to_tag
return False

View file

@ -3,9 +3,8 @@
"""
video_codec and video_profile property
"""
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from rebulk.remodule import re
from ..common import dash
from ..common.pattern import is_disabled
@ -43,7 +42,8 @@ def video_codec(config): # pylint:disable=unused-argument
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
rebulk.defaults(name="video_profile",
rebulk.defaults(clear=True,
name="video_profile",
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'video_profile'))
@ -66,7 +66,8 @@ def video_codec(config): # pylint:disable=unused-argument
rebulk.string('DXVA', value='DXVA', name='video_api',
disabled=lambda context: is_disabled(context, 'video_api'))
rebulk.defaults(name='color_depth',
rebulk.defaults(clear=True,
name='color_depth',
validator=seps_surround,
disabled=lambda context: is_disabled(context, 'color_depth'))
rebulk.regex('12.?bits?', value='12-bit')

View file

@ -67,7 +67,7 @@ def website(config):
"""
Validator for next website matches
"""
return any(name in ['season', 'episode', 'year'] for name in match.names)
return match.named('season', 'episode', 'year')
def when(self, matches, context):
to_remove = []
@ -80,7 +80,9 @@ def website(config):
if not safe:
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
if suffix:
to_remove.append(website_match)
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
if not group:
to_remove.append(website_match)
return to_remove
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)