Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2 Updates rebulk to 2.0.1
2025-08-14 18:47:09 -07:00 · 2022-11-28 19:44:46 -05:00 · 2022-11-28 19:44:46 -05:00 · 2226a74ef8
commit 2226a74ef8
parent ebc9718117
66 changed files with 2995 additions and 1306 deletions
--- a/libs/common/guessit/rules/common/formatters.py
+++ b/libs/common/guessit/rules/common/formatters.py
@ -25,7 +25,7 @@ def _potential_before(i, input_string):
    :return:
    :rtype: bool
    """
-    return i - 2 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
+    return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps


 def _potential_after(i, input_string):
--- a/libs/common/guessit/rules/common/validators.py
+++ b/libs/common/guessit/rules/common/validators.py
@ -28,7 +28,7 @@ def int_coercable(string):
        return False


-def compose(*validators):
+def and_(*validators):
    """
    Compose validators functions
    :param validators:
@ -49,3 +49,26 @@ def compose(*validators):
                return False
        return True
    return composed
+
+
+def or_(*validators):
+    """
+    Compose validators functions
+    :param validators:
+    :type validators:
+    :return:
+    :rtype:
+    """
+    def composed(string):
+        """
+        Composed validators function
+        :param string:
+        :type string:
+        :return:
+        :rtype:
+        """
+        for validator in validators:
+            if validator(string):
+                return True
+        return False
+    return composed
--- a/libs/common/guessit/rules/match_processors.py
+++ b/libs/common/guessit/rules/match_processors.py
@ -0,0 +1,20 @@
+"""
+Match processors
+"""
+from guessit.rules.common import seps
+
+
+def strip(match, chars=seps):
+    """
+    Strip given characters from match.
+
+    :param chars:
+    :param match:
+    :return:
+    """
+    while match.input_string[match.start] in chars:
+        match.start += 1
+    while match.input_string[match.end - 1] in chars:
+        match.end -= 1
+    if not match:
+        return False
--- a/libs/common/guessit/rules/processors.py
+++ b/libs/common/guessit/rules/processors.py
@ -34,7 +34,9 @@ class EnlargeGroupMatches(CustomRule):
            for match in matches.ending(group.end - 1):
                ending.append(match)

-        return starting, ending
+        if starting or ending:
+            return starting, ending
+        return False

    def then(self, matches, when_response, context):
        starting, ending = when_response
--- a/libs/common/guessit/rules/properties/audio_codec.py
+++ b/libs/common/guessit/rules/properties/audio_codec.py
@ -3,9 +3,8 @@
 """
 audio_codec, audio_profile and audio_channels property
 """
-from rebulk.remodule import re
-
 from rebulk import Rebulk, Rule, RemoveMatch
+from rebulk.remodule import re

 from ..common import dash
 from ..common.pattern import is_disabled
@ -23,7 +22,9 @@ def audio_codec(config):  # pylint:disable=unused-argument
    :return: Created Rebulk object
    :rtype: Rebulk
    """
-    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
+    rebulk = Rebulk()\
+        .regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
+        .string_defaults(ignore_case=True)

    def audio_codec_priority(match1, match2):
        """
@ -61,7 +62,9 @@ def audio_codec(config):  # pylint:disable=unused-argument
    rebulk.string('PCM', value='PCM')
    rebulk.string('LPCM', value='LPCM')

-    rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile'))
+    rebulk.defaults(clear=True,
+                    name='audio_profile',
+                    disabled=lambda context: is_disabled(context, 'audio_profile'))
    rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
    rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
    rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
@ -70,17 +73,19 @@ def audio_codec(config):  # pylint:disable=unused-argument
    rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
    rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])

-    rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels'))
-    rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True)
-    rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True)
-    rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True)
+    rebulk.defaults(clear=True,
+                    name="audio_channels",
+                    disabled=lambda context: is_disabled(context, 'audio_channels'))
    rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
    rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
    rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
-    rebulk.string('7ch', '8ch', value='7.1')
-    rebulk.string('5ch', '6ch', value='5.1')
-    rebulk.string('2ch', 'stereo', value='2.0')
-    rebulk.string('1ch', 'mono', value='1.0')
+
+    for value, items in config.get('audio_channels').items():
+        for item in items:
+            if item.startswith('re:'):
+                rebulk.regex(item[3:], value=value, children=True)
+            else:
+                rebulk.string(item, value=value)

    rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
                 AudioChannelsValidatorRule)
--- a/libs/common/guessit/rules/properties/bit_rate.py
+++ b/libs/common/guessit/rules/properties/bit_rate.py
@ -69,4 +69,6 @@ class BitRateTypeRule(Rule):
                    else:
                        to_rename.append(match)

-        return to_rename, to_remove
+        if to_rename or to_remove:
+            return to_rename, to_remove
+        return False
--- a/libs/common/guessit/rules/properties/bonus.py
+++ b/libs/common/guessit/rules/properties/bonus.py
@ -26,7 +26,8 @@ def bonus(config):  # pylint:disable=unused-argument
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)

    rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
-                 validator={'__parent__': lambda match: seps_surround},
+                 validator={'__parent__': seps_surround},
+                 validate_all=True,
                 conflict_solver=lambda match, conflicting: match
                 if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
                 else '__default__')
--- a/libs/common/guessit/rules/properties/container.py
+++ b/libs/common/guessit/rules/properties/container.py
@ -44,7 +44,8 @@ def container(config):
    rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
    rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])

-    rebulk.defaults(name='container',
+    rebulk.defaults(clear=True,
+                    name='container',
                    validator=seps_surround,
                    formatter=lambda s: s.lower(),
                    conflict_solver=lambda match, other: match
--- a/libs/common/guessit/rules/properties/episode_title.py
+++ b/libs/common/guessit/rules/properties/episode_title.py
@ -10,6 +10,7 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PRO
 from ..common import seps, title_seps
 from ..common.formatters import cleanup
 from ..common.pattern import is_disabled
+from ..common.validators import or_
 from ..properties.title import TitleFromPosition, TitleBaseRule
 from ..properties.type import TypeProcessor

@ -133,8 +134,7 @@ class EpisodeTitleFromPosition(TitleBaseRule):

    def hole_filter(self, hole, matches):
        episode = matches.previous(hole,
-                                   lambda previous: any(name in previous.names
-                                                        for name in self.previous_names),
+                                   lambda previous: previous.named(*self.previous_names),
                                   0)

        crc32 = matches.named('crc32')
@ -179,8 +179,7 @@ class AlternativeTitleReplace(Rule):
                                              predicate=lambda match: 'title' in match.tags, index=0)
            if main_title:
                episode = matches.previous(main_title,
-                                           lambda previous: any(name in previous.names
-                                                                for name in self.previous_names),
+                                           lambda previous: previous.named(*self.previous_names),
                                           0)

                crc32 = matches.named('crc32')
@ -249,7 +248,7 @@ class Filepart3EpisodeTitle(Rule):

            if season:
                hole = matches.holes(subdirectory.start, subdirectory.end,
-                                     ignore=lambda match: 'weak-episode' in match.tags,
+                                     ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
                                     formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
                                     index=0)
                if hole:
@ -292,7 +291,8 @@ class Filepart2EpisodeTitle(Rule):
            season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
                      matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
            if season:
-                hole = matches.holes(directory.start, directory.end, ignore=lambda match: 'weak-episode' in match.tags,
+                hole = matches.holes(directory.start, directory.end,
+                                     ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
                                     formatter=cleanup, seps=title_seps,
                                     predicate=lambda match: match.value, index=0)
                if hole:
--- a/libs/common/guessit/rules/properties/episodes.py
+++ b/libs/common/guessit/rules/properties/episodes.py
@ -11,12 +11,13 @@ from rebulk.match import Match
 from rebulk.remodule import re
 from rebulk.utils import is_iterable

+from guessit.rules import match_processors
+from guessit.rules.common.numeral import parse_numeral, numeral
 from .title import TitleFromPosition
 from ..common import dash, alt_dash, seps, seps_no_fs
 from ..common.formatters import strip
-from ..common.numeral import numeral, parse_numeral
 from ..common.pattern import is_disabled
-from ..common.validators import compose, seps_surround, seps_before, int_coercable
+from ..common.validators import seps_surround, int_coercable, and_
 from ...reutils import build_or_pattern


@ -29,17 +30,12 @@ def episodes(config):
    :return: Created Rebulk object
    :rtype: Rebulk
    """
+
    # pylint: disable=too-many-branches,too-many-statements,too-many-locals
    def is_season_episode_disabled(context):
        """Whether season and episode rules should be enabled."""
        return is_disabled(context, 'episode') or is_disabled(context, 'season')

-    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
-    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
-
-    episode_max_range = config['episode_max_range']
-    season_max_range = config['season_max_range']
-
    def episodes_season_chain_breaker(matches):
        """
        Break chains if there's more than 100 offset between two neighbor values.
@ -57,8 +53,6 @@ def episodes(config):
            return True
        return False

-    rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
-
    def season_episode_conflict_solver(match, other):
        """
        Conflict solver for episode/season patterns
@ -76,7 +70,6 @@ def episodes(config):
                if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
                        and not match.initiator.children.named(match.name + 'Marker')) or (
                            other.name == 'screen_size' and not int_coercable(other.raw)):
-
                    return match
                if other.name in ('season', 'episode') and match.initiator != other.initiator:
                    if (match.initiator.name in ('weak_episode', 'weak_duplicate')
@ -87,21 +80,6 @@ def episodes(config):
                            return current
        return '__default__'

-    season_words = config['season_words']
-    episode_words = config['episode_words']
-    of_words = config['of_words']
-    all_words = config['all_words']
-    season_markers = config['season_markers']
-    season_ep_markers = config['season_ep_markers']
-    disc_markers = config['disc_markers']
-    episode_markers = config['episode_markers']
-    range_separators = config['range_separators']
-    weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
-    strong_discrete_separators = config['discrete_separators']
-    discrete_separators = strong_discrete_separators + weak_discrete_separators
-
-    max_range_gap = config['max_range_gap']
-
    def ordering_validator(match):
        """
        Validator for season list. They should be in natural order to be validated.
@ -135,65 +113,18 @@ def episodes(config):
                                            lambda m: m.name == property_name + 'Separator')
                    separator = match.children.previous(current_match,
                                                        lambda m: m.name == property_name + 'Separator', 0)
-                    if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
-                        if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
-                            valid = False
-                    if separator.raw in strong_discrete_separators:
-                        valid = True
-                        break
+                    if separator:
+                        if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
+                            if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
+                                valid = False
+                        if separator.raw in strong_discrete_separators:
+                            valid = True
+                            break
                previous_match = current_match
            return valid

        return is_consecutive('episode') and is_consecutive('season')

-    # S01E02, 01x02, S01S02S03
-    rebulk.chain(formatter={'season': int, 'episode': int},
-                 tags=['SxxExx'],
-                 abbreviations=[alt_dash],
-                 children=True,
-                 private_parent=True,
-                 validate_all=True,
-                 validator={'__parent__': ordering_validator},
-                 conflict_solver=season_episode_conflict_solver,
-                 disabled=is_season_episode_disabled) \
-        .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
-               build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
-               validate_all=True,
-               validator={'__parent__': seps_before}).repeater('+') \
-        .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
-                                name='episodeSeparator',
-                                escape=True) +
-               r'(?P<episode>\d+)').repeater('*') \
-        .chain() \
-        .regex(r'(?P<season>\d+)@?' +
-               build_or_pattern(season_ep_markers, name='episodeMarker') +
-               r'@?(?P<episode>\d+)',
-               validate_all=True,
-               validator={'__parent__': seps_before}) \
-        .chain() \
-        .regex(r'(?P<season>\d+)@?' +
-               build_or_pattern(season_ep_markers, name='episodeMarker') +
-               r'@?(?P<episode>\d+)',
-               validate_all=True,
-               validator={'__parent__': seps_before}) \
-        .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
-                                name='episodeSeparator',
-                                escape=True) +
-               r'(?P<episode>\d+)').repeater('*') \
-        .chain() \
-        .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
-               validate_all=True,
-               validator={'__parent__': seps_before}) \
-        .regex(build_or_pattern(season_markers + discrete_separators + range_separators,
-                                name='seasonSeparator',
-                                escape=True) +
-               r'(?P<season>\d+)').repeater('*')
-
-    # episode_details property
-    for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
-        rebulk.string(episode_detail, value=episode_detail, name='episode_details',
-                      disabled=lambda context: is_disabled(context, 'episode_details'))
-
    def validate_roman(match):
        """
        Validate a roman match if surrounded by separators
@ -206,117 +137,203 @@ def episodes(config):
            return True
        return seps_surround(match)

+    season_words = config['season_words']
+    episode_words = config['episode_words']
+    of_words = config['of_words']
+    all_words = config['all_words']
+    season_markers = config['season_markers']
+    season_ep_markers = config['season_ep_markers']
+    disc_markers = config['disc_markers']
+    episode_markers = config['episode_markers']
+    range_separators = config['range_separators']
+    weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
+    strong_discrete_separators = config['discrete_separators']
+    discrete_separators = strong_discrete_separators + weak_discrete_separators
+    episode_max_range = config['episode_max_range']
+    season_max_range = config['season_max_range']
+    max_range_gap = config['max_range_gap']
+
+    rebulk = Rebulk() \
+        .regex_defaults(flags=re.IGNORECASE) \
+        .string_defaults(ignore_case=True) \
+        .chain_defaults(chain_breaker=episodes_season_chain_breaker) \
+        .defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
+                  formatter={'season': int, 'episode': int, 'version': int, 'count': int},
+                  children=True,
+                  private_parent=True,
+                  conflict_solver=season_episode_conflict_solver,
+                  abbreviations=[alt_dash])
+
+    # S01E02, 01x02, S01S02S03
+    rebulk.chain(
+        tags=['SxxExx'],
+        validate_all=True,
+        validator={'__parent__': and_(seps_surround, ordering_validator)},
+        disabled=is_season_episode_disabled) \
+        .defaults(tags=['SxxExx']) \
+        .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
+               build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
+        .repeater('+') \
+        .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
+                                name='episodeSeparator',
+                                escape=True) +
+               r'(?P<episode>\d+)').repeater('*')
+
+    rebulk.chain(tags=['SxxExx'],
+                 validate_all=True,
+                 validator={'__parent__': and_(seps_surround, ordering_validator)},
+                 disabled=is_season_episode_disabled) \
+        .defaults(tags=['SxxExx']) \
+        .regex(r'(?P<season>\d+)@?' +
+               build_or_pattern(season_ep_markers, name='episodeMarker') +
+               r'@?(?P<episode>\d+)').repeater('+') \
+
+    rebulk.chain(tags=['SxxExx'],
+                 validate_all=True,
+                 validator={'__parent__': and_(seps_surround, ordering_validator)},
+                 disabled=is_season_episode_disabled) \
+        .defaults(tags=['SxxExx']) \
+        .regex(r'(?P<season>\d+)@?' +
+               build_or_pattern(season_ep_markers, name='episodeMarker') +
+               r'@?(?P<episode>\d+)') \
+        .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
+                                name='episodeSeparator',
+                                escape=True) +
+               r'(?P<episode>\d+)').repeater('*')
+
+    rebulk.chain(tags=['SxxExx'],
+                 validate_all=True,
+                 validator={'__parent__': and_(seps_surround, ordering_validator)},
+                 disabled=is_season_episode_disabled) \
+        .defaults(tags=['SxxExx']) \
+        .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
+        .regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
+        .regex(build_or_pattern(season_markers + discrete_separators + range_separators,
+                                name='seasonSeparator',
+                                escape=True) +
+               r'(?P<season>\d+)').repeater('*')
+
+    # episode_details property
+    for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
+        rebulk.string(episode_detail,
+                      private_parent=False,
+                      children=False,
+                      value=episode_detail,
+                      name='episode_details',
+                      disabled=lambda context: is_disabled(context, 'episode_details'))
+
    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
-                    validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
+                    validate_all=True,
+                    validator={'__parent__': and_(seps_surround, ordering_validator)},
+                    children=True,
+                    private_parent=True,
                    conflict_solver=season_episode_conflict_solver)

-    rebulk.chain(abbreviations=[alt_dash],
+    rebulk.chain(validate_all=True,
+                 conflict_solver=season_episode_conflict_solver,
                 formatter={'season': parse_numeral, 'count': parse_numeral},
-                 validator={'__parent__': compose(seps_surround, ordering_validator),
+                 validator={'__parent__': and_(seps_surround, ordering_validator),
                            'season': validate_roman,
                            'count': validate_roman},
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
-        .defaults(validator=None) \
+        .defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
+                  validator={'season': validate_roman, 'count': validate_roman},
+                  conflict_solver=season_episode_conflict_solver) \
        .regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
        .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
        .regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
                                        name='seasonSeparator', escape=True) +
               r'@?(?P<season>\d+)').repeater('*')

+    rebulk.defaults(abbreviations=[dash])
+
    rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?',  # Episode 4
-                 abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
                 disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))

    rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?',  # Episode 4
-                 abbreviations=[dash],
                 validator={'episode': validate_roman},
-                 formatter={'episode': parse_numeral, 'version': int, 'count': int},
+                 formatter={'episode': parse_numeral},
                 disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))

    rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
                 tags=['SxxExx'],
-                 abbreviations=[dash],
-                 validator=None,
-                 formatter={'season': int, 'other': lambda match: 'Complete'},
+                 formatter={'other': lambda match: 'Complete'},
                 disabled=lambda context: is_disabled(context, 'season'))

    # 12, 13
-    rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
+    rebulk.chain(tags=['weak-episode'],
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
-        .defaults(validator=None) \
+        .defaults(validator=None, tags=['weak-episode']) \
        .regex(r'(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
-        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
+        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')

    # 012, 013
-    rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
+    rebulk.chain(tags=['weak-episode'],
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
-        .defaults(validator=None) \
+        .defaults(validator=None, tags=['weak-episode']) \
        .regex(r'0(?P<episode>\d{1,2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
-        .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
+        .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')

    # 112, 113
    rebulk.chain(tags=['weak-episode'],
-                 formatter={'episode': int, 'version': int},
                 name='weak_episode',
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
-        .defaults(validator=None) \
+        .defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
        .regex(r'(?P<episode>\d{3,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
-        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
+        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')

    # 1, 2, 3
-    rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
+    rebulk.chain(tags=['weak-episode'],
                 disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
-        .defaults(validator=None) \
+        .defaults(validator=None, tags=['weak-episode']) \
        .regex(r'(?P<episode>\d)') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
-        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
+        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')

    # e112, e113, 1e18, 3e19
-    # TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
-    rebulk.chain(formatter={'season': int, 'episode': int, 'version': int},
-                 disabled=lambda context: is_disabled(context, 'episode')) \
+    rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
-        .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
+        .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')

    # ep 112, ep113, ep112, ep113
-    rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int},
-                 disabled=lambda context: is_disabled(context, 'episode')) \
+    rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'ep-?(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
-        .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
+        .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')

    # cap 112, cap 112_114
-    rebulk.chain(abbreviations=[dash],
-                 tags=['see-pattern'],
-                 formatter={'season': int, 'episode': int},
+    rebulk.chain(tags=['see-pattern'],
                 disabled=is_season_episode_disabled) \
-        .defaults(validator=None) \
+        .defaults(validator=None, tags=['see-pattern']) \
        .regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
        .regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')

    # 102, 0102
    rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
-                 formatter={'season': int, 'episode': int, 'version': int},
                 name='weak_duplicate',
                 conflict_solver=season_episode_conflict_solver,
                 disabled=lambda context: (context.get('episode_prefer_number', False) or
                                           context.get('type') == 'movie') or is_season_episode_disabled(context)) \
-        .defaults(validator=None) \
+        .defaults(tags=['weak-episode', 'weak-duplicate'],
+                  name='weak_duplicate',
+                  validator=None,
+                  conflict_solver=season_episode_conflict_solver) \
        .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
-        .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
+        .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')

-    rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int,
+    rebulk.regex(r'v(?P<version>\d+)',
+                 formatter=int,
                 disabled=lambda context: is_disabled(context, 'version'))

    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
@ -325,18 +342,23 @@ def episodes(config):
    # detached of X count (season/episode)
    rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
                 r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
-                 abbreviations=[dash], children=True, private_parent=True, formatter=int,
+                 formatter=int,
+                 pre_match_processor=match_processors.strip,
                 disabled=lambda context: is_disabled(context, 'episode'))

-    rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode",
+    rebulk.regex(r'Minisodes?',
+                 children=False,
+                 private_parent=False,
+                 name='episode_format',
+                 value="Minisode",
                 disabled=lambda context: is_disabled(context, 'episode_format'))

    rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
                 SeePatternRange(range_separators + ['_']),
                 EpisodeNumberSeparatorRange(range_separators),
-                 SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
-                 RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
-                 RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
+                 SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
+                 EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
+                 RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)

    return rebulk

@ -416,7 +438,9 @@ class WeakConflictSolver(Rule):
                if to_append:
                    to_remove.extend(weak_dup_matches)

-        return to_remove, to_append
+        if to_remove or to_append:
+            return to_remove, to_append
+        return False


 class CountValidator(Rule):
@ -442,7 +466,9 @@ class CountValidator(Rule):
                    season_count.append(count)
            else:
                to_remove.append(count)
-        return to_remove, episode_count, season_count
+        if to_remove or episode_count or season_count:
+            return to_remove, episode_count, season_count
+        return False


 class SeePatternRange(Rule):
@ -477,7 +503,9 @@ class SeePatternRange(Rule):

            to_remove.append(separator)

-        return to_remove, to_append
+        if to_remove or to_append:
+            return to_remove, to_append
+        return False


 class AbstractSeparatorRange(Rule):
@ -533,7 +561,9 @@ class AbstractSeparatorRange(Rule):

            previous_match = next_match

-        return to_remove, to_append
+        if to_remove or to_append:
+            return to_remove, to_append
+        return False


 class RenameToAbsoluteEpisode(Rule):
@ -629,20 +659,41 @@ class RemoveWeak(Rule):
    Remove weak-episode matches which appears after video, source, and audio matches.
    """
    priority = 16
-    consequence = RemoveMatch
+    consequence = RemoveMatch, AppendMatch
+
+    def __init__(self, episode_words):
+        super(RemoveWeak, self).__init__()
+        self.episode_words = episode_words

    def when(self, matches, context):
        to_remove = []
+        to_append = []
        for filepart in matches.markers.named('path'):
            weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
            if weaks:
-                previous = matches.previous(weaks[0], predicate=lambda m: m.name in (
+                weak = weaks[0]
+                previous = matches.previous(weak, predicate=lambda m: m.name in (
                    'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
                    'audio_channels', 'audio_profile'), index=0)
                if previous and not matches.holes(
-                        previous.end, weaks[0].start, predicate=lambda m: m.raw.strip(seps)):
+                        previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
+                    if previous.raw.lower() in self.episode_words:
+                        try:
+                            episode = copy.copy(weak)
+                            episode.name = 'episode'
+                            episode.value = int(weak.value)
+                            episode.start = previous.start
+                            episode.private = False
+                            episode.tags = []
+
+                            to_append.append(episode)
+                        except ValueError:
+                            pass
+
                    to_remove.extend(weaks)
-        return to_remove
+        if to_remove or to_append:
+            return to_remove, to_append
+        return False


 class RemoveWeakIfSxxExx(Rule):
@ -856,4 +907,6 @@ class RenameToDiscMatch(Rule):
            markers.append(marker)
            discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))

-        return discs, markers, to_remove
+        if discs or markers or to_remove:
+            return discs, markers, to_remove
+        return False
--- a/libs/common/guessit/rules/properties/language.py
+++ b/libs/common/guessit/rules/properties/language.py
@ -72,6 +72,8 @@ def language(config, common_words):


 UNDETERMINED = babelfish.Language('und')
+MULTIPLE = babelfish.Language('mul')
+NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])


 class GuessitConverter(babelfish.LanguageReverseConverter):  # pylint: disable=missing-docstring
@ -388,7 +390,9 @@ class SubtitlePrefixLanguageRule(Rule):
                to_remove.extend(matches.conflicting(lang))
                if prefix in to_remove:
                    to_remove.remove(prefix)
-        return to_rename, to_remove
+        if to_rename or to_remove:
+            return to_rename, to_remove
+        return False

    def then(self, matches, when_response, context):
        to_rename, to_remove = when_response
@ -425,7 +429,9 @@ class SubtitleSuffixLanguageRule(Rule):
                to_append.append(lang)
                if suffix in to_remove:
                    to_remove.remove(suffix)
-        return to_append, to_remove
+        if to_append or to_remove:
+            return to_append, to_remove
+        return False

    def then(self, matches, when_response, context):
        to_rename, to_remove = when_response
@ -478,6 +484,7 @@ class RemoveInvalidLanguages(Rule):
    """Remove language matches that matches the blacklisted common words."""

    consequence = RemoveMatch
+    priority = 32

    def __init__(self, common_words):
        """Constructor."""
--- a/libs/common/guessit/rules/properties/other.py
+++ b/libs/common/guessit/rules/properties/other.py
@ -11,7 +11,7 @@ from rebulk.remodule import re
 from ..common import dash
 from ..common import seps
 from ..common.pattern import is_disabled
-from ..common.validators import seps_after, seps_before, seps_surround, compose
+from ..common.validators import seps_after, seps_before, seps_surround, and_
 from ...reutils import build_or_pattern
 from ...rules.common.formatters import raw_cleanup

@ -35,11 +35,16 @@ def other(config):  # pylint:disable=unused-argument,too-many-statements
    rebulk.regex('ws', 'wide-?screen', value='Widescreen')
    rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')

-    rebulk.string('Proper', 'Repack', 'Rerip', value='Proper',
+    rebulk.string('Repack', 'Rerip', value='Proper',
                  tags=['streaming_service.prefix', 'streaming_service.suffix'])
+    rebulk.string('Proper', value='Proper',
+                  tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])

    rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
                 tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
+    rebulk.regex('Real', value='Proper',
+                 tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
+
    rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
                                                     'streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
@ -72,16 +77,18 @@ def other(config):  # pylint:disable=unused-argument,too-many-statements
                 private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
                 value={'other': 'Complete'},
                 tags=['release-group-prefix'],
-                 validator={'__parent__': compose(seps_surround, validate_complete)})
+                 validator={'__parent__': and_(seps_surround, validate_complete)})
    rebulk.string('R5', value='Region 5')
    rebulk.string('RC', value='Region C')
    rebulk.regex('Pre-?Air', value='Preair')
-    rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
+    rebulk.regex('(?:PS-?)Vita', value='PS Vita')
+    rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
    rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
                 private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)

-    for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'):
+    for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
        rebulk.string(value, value=value)
+    rebulk.string('3D', value='3D', tags='has-neighbor')

    rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
    rebulk.string('HR', value='High Resolution')
@ -90,6 +97,7 @@ def other(config):  # pylint:disable=unused-argument,too-many-statements
    rebulk.string('mHD', 'HDLight', value='Micro HD')
    rebulk.string('LDTV', value='Low Definition')
    rebulk.string('HFR', value='High Frame Rate')
+    rebulk.string('VFR', value='Variable Frame Rate')
    rebulk.string('HD', value='HD', validator=None,
                  tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
@ -128,13 +136,15 @@ def other(config):  # pylint:disable=unused-argument,too-many-statements
    rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')

    rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
+    rebulk.string('Extras', value='Extras', tags='has-neighbor')
+    rebulk.regex('Digital-?Extras?', value='Extras')
    rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
    rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
    rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')

    rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
                 ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
-                 ValidateAtEnd, ProperCountRule)
+                 ValidateAtEnd, ValidateReal, ProperCountRule)

    return rebulk

@ -354,3 +364,20 @@ class ValidateAtEnd(Rule):
                    to_remove.append(match)

        return to_remove
+
+
+class ValidateReal(Rule):
+    """
+    Validate Real
+    """
+    consequence = RemoveMatch
+    priority = 64
+
+    def when(self, matches, context):
+        ret = []
+        for filepart in matches.markers.named('path'):
+            for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
+                if not matches.range(filepart.start, match.start):
+                    ret.append(match)
+
+        return ret
--- a/libs/common/guessit/rules/properties/part.py
+++ b/libs/common/guessit/rules/properties/part.py
@ -8,7 +8,7 @@ from rebulk.remodule import re
 from rebulk import Rebulk
 from ..common import dash
 from ..common.pattern import is_disabled
-from ..common.validators import seps_surround, int_coercable, compose
+from ..common.validators import seps_surround, int_coercable, and_
 from ..common.numeral import numeral, parse_numeral
 from ...reutils import build_or_pattern

@ -41,6 +41,6 @@ def part(config):  # pylint:disable=unused-argument

    rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
                 prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
-                 validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
+                 validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})

    return rebulk
--- a/libs/common/guessit/rules/properties/release_group.py
+++ b/libs/common/guessit/rules/properties/release_group.py
@ -9,8 +9,8 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
 from rebulk.match import Match

 from ..common import seps
-from ..common.expected import build_expected_function
 from ..common.comparators import marker_sorted
+from ..common.expected import build_expected_function
 from ..common.formatters import cleanup
 from ..common.pattern import is_disabled
 from ..common.validators import int_coercable, seps_surround
@ -50,7 +50,7 @@ def release_group(config):
            if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
                string = string[:len(forbidden)]
                string = string.strip(groupname_seps)
-        return string
+        return string.strip()

    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))

@ -72,7 +72,9 @@ _scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'a
                         'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
                         'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')

-_scene_previous_tags = ('release-group-prefix', )
+_scene_previous_tags = ('release-group-prefix',)
+
+_scene_no_previous_tags = ('no-release-group-prefix',)


 class DashSeparatedReleaseGroup(Rule):
@ -193,7 +195,8 @@ class DashSeparatedReleaseGroup(Rule):

                if releasegroup.value:
                    to_append.append(releasegroup)
-                return to_remove, to_append
+                if to_remove or to_append:
+                    return to_remove, to_append


 class SceneReleaseGroup(Rule):
@ -212,6 +215,17 @@ class SceneReleaseGroup(Rule):
        super(SceneReleaseGroup, self).__init__()
        self.value_formatter = value_formatter

+    @staticmethod
+    def is_previous_match(match):
+        """
+        Check if match can precede release_group
+
+        :param match:
+        :return:
+        """
+        return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
+            match.tagged(*_scene_previous_tags)
+
    def when(self, matches, context):  # pylint:disable=too-many-locals
        # If a release_group is found before, ignore this kind of release_group rule.

@ -253,13 +267,12 @@ class SceneReleaseGroup(Rule):

                    if match.start < filepart.start:
                        return False
-                    return not match.private or match.name in _scene_previous_names
+                    return not match.private or self.is_previous_match(match)

                previous_match = matches.previous(last_hole,
                                                  previous_match_filter,
                                                  index=0)
-                if previous_match and (previous_match.name in _scene_previous_names or
-                                       any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
+                if previous_match and (self.is_previous_match(previous_match)) and \
                        not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
                        and not int_coercable(last_hole.value.strip(seps)):

@ -300,11 +313,11 @@ class AnimeReleaseGroup(Rule):

        # If a release_group is found before, ignore this kind of release_group rule.
        if matches.named('release_group'):
-            return to_remove, to_append
+            return False

        if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
            # This doesn't seems to be an anime, and we already found another release_group.
-            return to_remove, to_append
+            return False

        for filepart in marker_sorted(matches.markers.named('path'), matches):

@ -328,4 +341,7 @@ class AnimeReleaseGroup(Rule):
                to_append.append(group)
                to_remove.extend(matches.range(empty_group.start, empty_group.end,
                                               lambda m: 'weak-language' in m.tags))
-        return to_remove, to_append
+
+        if to_remove or to_append:
+            return to_remove, to_append
+        return False
--- a/libs/common/guessit/rules/properties/screen_size.py
+++ b/libs/common/guessit/rules/properties/screen_size.py
@ -24,8 +24,8 @@ def screen_size(config):
    :return: Created Rebulk object
    :rtype: Rebulk
    """
-    interlaced = frozenset({res for res in config['interlaced']})
-    progressive = frozenset({res for res in config['progressive']})
+    interlaced = frozenset(config['interlaced'])
+    progressive = frozenset(config['progressive'])
    frame_rates = [re.escape(rate) for rate in config['frame_rates']]
    min_ar = config['min_ar']
    max_ar = config['max_ar']
--- a/libs/common/guessit/rules/properties/source.py
+++ b/libs/common/guessit/rules/properties/source.py
@ -12,7 +12,7 @@ from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
 from .audio_codec import HqConflictRule
 from ..common import dash, seps
 from ..common.pattern import is_disabled
-from ..common.validators import seps_before, seps_after
+from ..common.validators import seps_before, seps_after, or_


 def source(config):  # pylint:disable=unused-argument
@ -26,7 +26,10 @@ def source(config):  # pylint:disable=unused-argument
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
-    rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix'])
+    rebulk = rebulk.defaults(name='source',
+                             tags=['video-codec-prefix', 'streaming_service.suffix'],
+                             validate_all=True,
+                             validator={'__parent__': or_(seps_before, seps_after)})

    rip_prefix = '(?P<other>Rip)-?'
    rip_suffix = '-?(?P<other>Rip)'
@ -42,7 +45,7 @@ def source(config):  # pylint:disable=unused-argument

    def demote_other(match, other):  # pylint: disable=unused-argument
        """Default conflict solver with 'other' property."""
-        return other if other.name == 'other' else '__default__'
+        return other if other.name == 'other' or other.name == 'release_group' else '__default__'

    rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
                 value={'source': 'VHS', 'other': 'Rip'})
@ -92,8 +95,9 @@ def source(config):  # pylint:disable=unused-argument
    # WEBCap is a synonym to WEBRip, mostly used by non english
    rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
                 value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
-    rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'WEB', 'DL-?WEB', 'DL(?=-?Mux)'),
+    rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
                 value={'source': 'Web'})
+    rebulk.regex('(WEB)', value='Web', tags='weak.source')

    rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
                 value={'source': 'HD-DVD', 'other': 'Rip'})
@ -118,7 +122,7 @@ def source(config):  # pylint:disable=unused-argument
    rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
                 value={'source': 'Satellite', 'other': 'Rip'})

-    rebulk.rules(ValidateSource, UltraHdBlurayRule)
+    rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)

    return rebulk

@ -170,32 +174,62 @@ class UltraHdBlurayRule(Rule):
                to_remove.append(match)
                to_append.append(new_source)

-        return to_remove, to_append
+        if to_remove or to_append:
+            return to_remove, to_append
+        return False


-class ValidateSource(Rule):
+class ValidateSourcePrefixSuffix(Rule):
    """
-    Validate source with screener property, with video_codec property or separated
+    Validate source with source prefix, source suffix.
    """
    priority = 64
    consequence = RemoveMatch

    def when(self, matches, context):
        ret = []
-        for match in matches.named('source'):
-            match = match.initiator
-            if not seps_before(match) and \
-                    not matches.range(match.start - 1, match.start - 2,
-                                      lambda m: 'source-prefix' in m.tags):
-                if match.children:
-                    ret.extend(match.children)
-                ret.append(match)
-                continue
-            if not seps_after(match) and \
-                    not matches.range(match.end, match.end + 1,
-                                      lambda m: 'source-suffix' in m.tags):
-                if match.children:
-                    ret.extend(match.children)
-                ret.append(match)
-                continue
+        for filepart in matches.markers.named('path'):
+            for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
+                match = match.initiator
+                if not seps_before(match) and \
+                        not matches.range(match.start - 1, match.start - 2,
+                                          lambda m: 'source-prefix' in m.tags):
+                    if match.children:
+                        ret.extend(match.children)
+                    ret.append(match)
+                    continue
+                if not seps_after(match) and \
+                        not matches.range(match.end, match.end + 1,
+                                          lambda m: 'source-suffix' in m.tags):
+                    if match.children:
+                        ret.extend(match.children)
+                    ret.append(match)
+                    continue
+
+        return ret
+
+
+class ValidateWeakSource(Rule):
+    """
+    Validate weak source
+    """
+    dependency = [ValidateSourcePrefixSuffix]
+    priority = 64
+    consequence = RemoveMatch
+
+    def when(self, matches, context):
+        ret = []
+        for filepart in matches.markers.named('path'):
+            for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
+                # if there are more than 1 source in this filepart, just before the year and with holes for the title
+                # most likely the source is part of the title
+                if 'weak.source' in match.tags \
+                        and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
+                        and matches.holes(filepart.start, match.start,
+                                          predicate=lambda m: m.value.strip(seps), index=-1):
+                    if match.children:
+                        ret.extend(match.children)
+                    ret.append(match)
+                    continue
+
        return ret
--- a/libs/common/guessit/rules/properties/streaming_service.py
+++ b/libs/common/guessit/rules/properties/streaming_service.py
@ -25,133 +25,13 @@ def streaming_service(config):  # pylint: disable=too-many-statements,unused-arg
    rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='streaming_service', tags=['source-prefix'])

-    rebulk.string('AE', 'A&E', value='A&E')
-    rebulk.string('AMBC', value='ABC')
-    rebulk.string('AUBC', value='ABC Australia')
-    rebulk.string('AJAZ', value='Al Jazeera English')
-    rebulk.string('AMC', value='AMC')
-    rebulk.string('AMZN', 'Amazon', value='Amazon Prime')
-    rebulk.regex('Amazon-?Prime', value='Amazon Prime')
-    rebulk.string('AS', value='Adult Swim')
-    rebulk.regex('Adult-?Swim', value='Adult Swim')
-    rebulk.string('ATK', value="America's Test Kitchen")
-    rebulk.string('ANPL', value='Animal Planet')
-    rebulk.string('ANLB', value='AnimeLab')
-    rebulk.string('AOL', value='AOL')
-    rebulk.string('ARD', value='ARD')
-    rebulk.string('iP', value='BBC iPlayer')
-    rebulk.regex('BBC-?iPlayer', value='BBC iPlayer')
-    rebulk.string('BRAV', value='BravoTV')
-    rebulk.string('CNLP', value='Canal+')
-    rebulk.string('CN', value='Cartoon Network')
-    rebulk.string('CBC', value='CBC')
-    rebulk.string('CBS', value='CBS')
-    rebulk.string('CNBC', value='CNBC')
-    rebulk.string('CC', value='Comedy Central')
-    rebulk.string('4OD', value='Channel 4')
-    rebulk.string('CHGD', value='CHRGD')
-    rebulk.string('CMAX', value='Cinemax')
-    rebulk.string('CMT', value='Country Music Television')
-    rebulk.regex('Comedy-?Central', value='Comedy Central')
-    rebulk.string('CCGC', value='Comedians in Cars Getting Coffee')
-    rebulk.string('CR', value='Crunchy Roll')
-    rebulk.string('CRKL', value='Crackle')
-    rebulk.regex('Crunchy-?Roll', value='Crunchy Roll')
-    rebulk.string('CSPN', value='CSpan')
-    rebulk.string('CTV', value='CTV')
-    rebulk.string('CUR', value='CuriosityStream')
-    rebulk.string('CWS', value='CWSeed')
-    rebulk.string('DSKI', value='Daisuki')
-    rebulk.string('DHF', value='Deadhouse Films')
-    rebulk.string('DDY', value='Digiturk Diledigin Yerde')
-    rebulk.string('DISC', 'Discovery', value='Discovery')
-    rebulk.string('DSNY', 'Disney', value='Disney')
-    rebulk.string('DIY', value='DIY Network')
-    rebulk.string('DOCC', value='Doc Club')
-    rebulk.string('DPLY', value='DPlay')
-    rebulk.string('ETV', value='E!')
-    rebulk.string('EPIX', value='ePix')
-    rebulk.string('ETTV', value='El Trece')
-    rebulk.string('ESPN', value='ESPN')
-    rebulk.string('ESQ', value='Esquire')
-    rebulk.string('FAM', value='Family')
-    rebulk.string('FJR', value='Family Jr')
-    rebulk.string('FOOD', value='Food Network')
-    rebulk.string('FOX', value='Fox')
-    rebulk.string('FREE', value='Freeform')
-    rebulk.string('FYI', value='FYI Network')
-    rebulk.string('GLBL', value='Global')
-    rebulk.string('GLOB', value='GloboSat Play')
-    rebulk.string('HLMK', value='Hallmark')
-    rebulk.string('HBO', value='HBO Go')
-    rebulk.regex('HBO-?Go', value='HBO Go')
-    rebulk.string('HGTV', value='HGTV')
-    rebulk.string('HIST', 'History', value='History')
-    rebulk.string('HULU', value='Hulu')
-    rebulk.string('ID', value='Investigation Discovery')
-    rebulk.string('IFC', value='IFC')
-    rebulk.string('iTunes', 'iT', value='iTunes')
-    rebulk.string('ITV', value='ITV')
-    rebulk.string('KNOW', value='Knowledge Network')
-    rebulk.string('LIFE', value='Lifetime')
-    rebulk.string('MTOD', value='Motor Trend OnDemand')
-    rebulk.string('MNBC', value='MSNBC')
-    rebulk.string('MTV', value='MTV')
-    rebulk.string('NATG', value='National Geographic')
-    rebulk.regex('National-?Geographic', value='National Geographic')
-    rebulk.string('NBA', value='NBA TV')
-    rebulk.regex('NBA-?TV', value='NBA TV')
-    rebulk.string('NBC', value='NBC')
-    rebulk.string('NF', 'Netflix', value='Netflix')
-    rebulk.string('NFL', value='NFL')
-    rebulk.string('NFLN', value='NFL Now')
-    rebulk.string('GC', value='NHL GameCenter')
-    rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
-    rebulk.string('NRK', value='Norsk Rikskringkasting')
-    rebulk.string('PBS', value='PBS')
-    rebulk.string('PBSK', value='PBS Kids')
-    rebulk.string('PSN', value='Playstation Network')
-    rebulk.string('PLUZ', value='Pluzz')
-    rebulk.string('RTE', value='RTE One')
-    rebulk.string('SBS', value='SBS (AU)')
-    rebulk.string('SESO', 'SeeSo', value='SeeSo')
-    rebulk.string('SHMI', value='Shomi')
-    rebulk.string('SPIK', value='Spike')
-    rebulk.string('SPKE', value='Spike TV')
-    rebulk.regex('Spike-?TV', value='Spike TV')
-    rebulk.string('SNET', value='Sportsnet')
-    rebulk.string('SPRT', value='Sprout')
-    rebulk.string('STAN', value='Stan')
-    rebulk.string('STZ', value='Starz')
-    rebulk.string('SVT', value='Sveriges Television')
-    rebulk.string('SWER', value='SwearNet')
-    rebulk.string('SYFY', value='Syfy')
-    rebulk.string('TBS', value='TBS')
-    rebulk.string('TFOU', value='TFou')
-    rebulk.string('CW', value='The CW')
-    rebulk.regex('The-?CW', value='The CW')
-    rebulk.string('TLC', value='TLC')
-    rebulk.string('TUBI', value='TubiTV')
-    rebulk.string('TV3', value='TV3 Ireland')
-    rebulk.string('TV4', value='TV4 Sweeden')
-    rebulk.string('TVL', value='TV Land')
-    rebulk.regex('TV-?Land', value='TV Land')
-    rebulk.string('UFC', value='UFC')
-    rebulk.string('UKTV', value='UKTV')
-    rebulk.string('UNIV', value='Univision')
-    rebulk.string('USAN', value='USA Network')
-    rebulk.string('VLCT', value='Velocity')
-    rebulk.string('VH1', value='VH1')
-    rebulk.string('VICE', value='Viceland')
-    rebulk.string('VMEO', value='Vimeo')
-    rebulk.string('VRV', value='VRV')
-    rebulk.string('WNET', value='W Network')
-    rebulk.string('WME', value='WatchMe')
-    rebulk.string('WWEN', value='WWE Network')
-    rebulk.string('XBOX', value='Xbox Video')
-    rebulk.string('YHOO', value='Yahoo')
-    rebulk.string('RED', value='YouTube Red')
-    rebulk.string('ZDF', value='ZDF')
+    for value, items in config.items():
+        patterns = items if isinstance(items, list) else [items]
+        for pattern in patterns:
+            if pattern.startswith('re:'):
+                rebulk.regex(pattern, value=value)
+            else:
+                rebulk.string(pattern, value=value)

    rebulk.rules(ValidateStreamingService)

@ -161,7 +41,7 @@ def streaming_service(config):  # pylint: disable=too-many-statements,unused-arg
 class ValidateStreamingService(Rule):
    """Validate streaming service matches."""

-    priority = 32
+    priority = 128
    consequence = RemoveMatch

    def when(self, matches, context):
--- a/libs/common/guessit/rules/properties/title.py
+++ b/libs/common/guessit/rules/properties/title.py
@ -8,7 +8,12 @@ from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
 from rebulk.formatters import formatters

 from .film import FilmTitleRule
-from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
+from .language import (
+    SubtitlePrefixLanguageRule,
+    SubtitleSuffixLanguageRule,
+    SubtitleExtensionRule,
+    NON_SPECIFIC_LANGUAGES
+)
 from ..common import seps, title_seps
 from ..common.comparators import marker_sorted
 from ..common.expected import build_expected_function
@ -88,12 +93,19 @@ class TitleBaseRule(Rule):
        :rtype:
        """
        cropped_holes = []
+        group_markers = matches.markers.named('group')
+        for group_marker in group_markers:
+            path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
+            if path_marker and path_marker.span == group_marker.span:
+                group_markers.remove(group_marker)
+
        for hole in holes:
-            group_markers = matches.markers.named('group')
            cropped_holes.extend(hole.crop(group_markers))
+
        return cropped_holes

-    def is_ignored(self, match):
+    @staticmethod
+    def is_ignored(match):
        """
        Ignore matches when scanning for title (hole).

@ -130,7 +142,8 @@ class TitleBaseRule(Rule):
            for outside in outside_matches:
                other_languages.extend(matches.range(outside.start, outside.end,
                                                     lambda c_match: c_match.name == match.name and
-                                                     c_match not in to_keep))
+                                                     c_match not in to_keep and
+                                                     c_match.value not in NON_SPECIFIC_LANGUAGES))

            if not other_languages and (not starting or len(match.raw) <= 3):
                return True
@ -239,7 +252,7 @@ class TitleBaseRule(Rule):
        to_remove = []

        if matches.named(self.match_name, lambda match: 'expected' in match.tags):
-            return ret, to_remove
+            return False

        fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
                     if not self.filepart_filter or self.filepart_filter(filepart, matches)]
@ -272,7 +285,9 @@ class TitleBaseRule(Rule):
                ret.extend(titles)
                to_remove.extend(to_remove_c)

-        return ret, to_remove
+        if ret or to_remove:
+            return ret, to_remove
+        return False


 class TitleFromPosition(TitleBaseRule):
@ -329,4 +344,6 @@ class PreferTitleWithYear(Rule):
        for title_match in titles:
            if title_match.value not in title_values:
                to_remove.append(title_match)
-        return to_remove, to_tag
+        if to_remove or to_tag:
+            return to_remove, to_tag
+        return False
--- a/libs/common/guessit/rules/properties/video_codec.py
+++ b/libs/common/guessit/rules/properties/video_codec.py
@ -3,9 +3,8 @@
 """
 video_codec and video_profile property
 """
-from rebulk.remodule import re
-
 from rebulk import Rebulk, Rule, RemoveMatch
+from rebulk.remodule import re

 from ..common import dash
 from ..common.pattern import is_disabled
@ -43,7 +42,8 @@ def video_codec(config):  # pylint:disable=unused-argument

    # http://blog.mediacoderhq.com/h264-profiles-and-levels/
    # https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
-    rebulk.defaults(name="video_profile",
+    rebulk.defaults(clear=True,
+                    name="video_profile",
                    validator=seps_surround,
                    disabled=lambda context: is_disabled(context, 'video_profile'))

@ -66,7 +66,8 @@ def video_codec(config):  # pylint:disable=unused-argument
    rebulk.string('DXVA', value='DXVA', name='video_api',
                  disabled=lambda context: is_disabled(context, 'video_api'))

-    rebulk.defaults(name='color_depth',
+    rebulk.defaults(clear=True,
+                    name='color_depth',
                    validator=seps_surround,
                    disabled=lambda context: is_disabled(context, 'color_depth'))
    rebulk.regex('12.?bits?', value='12-bit')
--- a/libs/common/guessit/rules/properties/website.py
+++ b/libs/common/guessit/rules/properties/website.py
@ -67,7 +67,7 @@ def website(config):
            """
            Validator for next website matches
            """
-            return any(name in ['season', 'episode', 'year'] for name in match.names)
+            return match.named('season', 'episode', 'year')

        def when(self, matches, context):
            to_remove = []
@ -80,7 +80,9 @@ def website(config):
                if not safe:
                    suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
                    if suffix:
-                        to_remove.append(website_match)
+                        group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
+                        if not group:
+                            to_remove.append(website_match)
            return to_remove

    rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)