mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-21 05:43:16 -07:00
Move common libs to libs/common
This commit is contained in:
parent
8dbb1a2451
commit
1f4bd41bcc
1612 changed files with 962 additions and 10 deletions
99
libs/common/guessit/rules/__init__.py
Normal file
99
libs/common/guessit/rules/__init__.py
Normal file
|
@ -0,0 +1,99 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Rebulk object default builder
|
||||
"""
|
||||
from rebulk import Rebulk
|
||||
|
||||
from .markers.path import path
|
||||
from .markers.groups import groups
|
||||
|
||||
from .properties.episodes import episodes
|
||||
from .properties.container import container
|
||||
from .properties.source import source
|
||||
from .properties.video_codec import video_codec
|
||||
from .properties.audio_codec import audio_codec
|
||||
from .properties.screen_size import screen_size
|
||||
from .properties.website import website
|
||||
from .properties.date import date
|
||||
from .properties.title import title
|
||||
from .properties.episode_title import episode_title
|
||||
from .properties.language import language
|
||||
from .properties.country import country
|
||||
from .properties.release_group import release_group
|
||||
from .properties.streaming_service import streaming_service
|
||||
from .properties.other import other
|
||||
from .properties.size import size
|
||||
from .properties.bit_rate import bit_rate
|
||||
from .properties.edition import edition
|
||||
from .properties.cds import cds
|
||||
from .properties.bonus import bonus
|
||||
from .properties.film import film
|
||||
from .properties.part import part
|
||||
from .properties.crc import crc
|
||||
from .properties.mimetype import mimetype
|
||||
from .properties.type import type_
|
||||
|
||||
from .processors import processors
|
||||
|
||||
|
||||
def rebulk_builder(config):
|
||||
"""
|
||||
Default builder for main Rebulk object used by api.
|
||||
:return: Main Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
def _config(name):
|
||||
return config.get(name, {})
|
||||
|
||||
rebulk = Rebulk()
|
||||
|
||||
common_words = frozenset(_config('common_words'))
|
||||
|
||||
rebulk.rebulk(path(_config('path')))
|
||||
rebulk.rebulk(groups(_config('groups')))
|
||||
|
||||
rebulk.rebulk(episodes(_config('episodes')))
|
||||
rebulk.rebulk(container(_config('container')))
|
||||
rebulk.rebulk(source(_config('source')))
|
||||
rebulk.rebulk(video_codec(_config('video_codec')))
|
||||
rebulk.rebulk(audio_codec(_config('audio_codec')))
|
||||
rebulk.rebulk(screen_size(_config('screen_size')))
|
||||
rebulk.rebulk(website(_config('website')))
|
||||
rebulk.rebulk(date(_config('date')))
|
||||
rebulk.rebulk(title(_config('title')))
|
||||
rebulk.rebulk(episode_title(_config('episode_title')))
|
||||
rebulk.rebulk(language(_config('language'), common_words))
|
||||
rebulk.rebulk(country(_config('country'), common_words))
|
||||
rebulk.rebulk(release_group(_config('release_group')))
|
||||
rebulk.rebulk(streaming_service(_config('streaming_service')))
|
||||
rebulk.rebulk(other(_config('other')))
|
||||
rebulk.rebulk(size(_config('size')))
|
||||
rebulk.rebulk(bit_rate(_config('bit_rate')))
|
||||
rebulk.rebulk(edition(_config('edition')))
|
||||
rebulk.rebulk(cds(_config('cds')))
|
||||
rebulk.rebulk(bonus(_config('bonus')))
|
||||
rebulk.rebulk(film(_config('film')))
|
||||
rebulk.rebulk(part(_config('part')))
|
||||
rebulk.rebulk(crc(_config('crc')))
|
||||
|
||||
rebulk.rebulk(processors(_config('processors')))
|
||||
|
||||
rebulk.rebulk(mimetype(_config('mimetype')))
|
||||
rebulk.rebulk(type_(_config('type')))
|
||||
|
||||
def customize_properties(properties):
|
||||
"""
|
||||
Customize default rebulk properties
|
||||
"""
|
||||
count = properties['count']
|
||||
del properties['count']
|
||||
|
||||
properties['season_count'] = count
|
||||
properties['episode_count'] = count
|
||||
|
||||
return properties
|
||||
|
||||
rebulk.customize_properties = customize_properties
|
||||
|
||||
return rebulk
|
15
libs/common/guessit/rules/common/__init__.py
Normal file
15
libs/common/guessit/rules/common/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Common module
|
||||
"""
|
||||
import re
|
||||
|
||||
seps = r' [](){}+*|=-_~#/\\.,;:' # list of tags/words separators
|
||||
seps_no_groups = seps.replace('[](){}', '')
|
||||
seps_no_fs = seps.replace('/', '').replace('\\', '')
|
||||
|
||||
title_seps = r'-+/\|' # separators for title
|
||||
|
||||
dash = (r'-', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
|
||||
alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
|
75
libs/common/guessit/rules/common/comparators.py
Normal file
75
libs/common/guessit/rules/common/comparators.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Comparators
|
||||
"""
|
||||
try:
|
||||
from functools import cmp_to_key
|
||||
except ImportError:
|
||||
from ...backports import cmp_to_key
|
||||
|
||||
|
||||
def marker_comparator_predicate(match):
|
||||
"""
|
||||
Match predicate used in comparator
|
||||
"""
|
||||
return (
|
||||
not match.private
|
||||
and match.name not in ('proper_count', 'title')
|
||||
and not (match.name == 'container' and 'extension' in match.tags)
|
||||
and not (match.name == 'other' and match.value == 'Rip')
|
||||
)
|
||||
|
||||
|
||||
def marker_weight(matches, marker, predicate):
|
||||
"""
|
||||
Compute the comparator weight of a marker
|
||||
:param matches:
|
||||
:param marker:
|
||||
:param predicate:
|
||||
:return:
|
||||
"""
|
||||
return len(set(match.name for match in matches.range(*marker.span, predicate=predicate)))
|
||||
|
||||
|
||||
def marker_comparator(matches, markers, predicate):
|
||||
"""
|
||||
Builds a comparator that returns markers sorted from the most valuable to the less.
|
||||
|
||||
Take the parts where matches count is higher, then when length is higher, then when position is at left.
|
||||
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param markers:
|
||||
:param predicate:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def comparator(marker1, marker2):
|
||||
"""
|
||||
The actual comparator function.
|
||||
"""
|
||||
matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
|
||||
if matches_count:
|
||||
return matches_count
|
||||
|
||||
# give preference to rightmost path
|
||||
return markers.index(marker2) - markers.index(marker1)
|
||||
|
||||
return comparator
|
||||
|
||||
|
||||
def marker_sorted(markers, matches, predicate=marker_comparator_predicate):
|
||||
"""
|
||||
Sort markers from matches, from the most valuable to the less.
|
||||
|
||||
:param markers:
|
||||
:type markers:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param predicate:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers, predicate=predicate)))
|
125
libs/common/guessit/rules/common/date.py
Normal file
125
libs/common/guessit/rules/common/date.py
Normal file
|
@ -0,0 +1,125 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Date
|
||||
"""
|
||||
from dateutil import parser
|
||||
|
||||
from rebulk.remodule import re
|
||||
|
||||
_dsep = r'[-/ \.]'
|
||||
_dsep_bis = r'[-/ \.x]'
|
||||
|
||||
date_regexps = [
|
||||
re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
|
||||
re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
|
||||
re.IGNORECASE)]
|
||||
|
||||
|
||||
def valid_year(year):
|
||||
"""Check if number is a valid year"""
|
||||
return 1920 <= year < 2030
|
||||
|
||||
|
||||
def _is_int(string):
|
||||
"""
|
||||
Check if the input string is an integer
|
||||
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
try:
|
||||
int(string)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _guess_day_first_parameter(groups): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
If day_first is not defined, use some heuristic to fix it.
|
||||
It helps to solve issues with python dateutils 2.5.3 parser changes.
|
||||
|
||||
:param groups: match groups found for the date
|
||||
:type groups: list of match objects
|
||||
:return: day_first option guessed value
|
||||
:rtype: bool
|
||||
"""
|
||||
|
||||
# If match starts with a long year, then day_first is force to false.
|
||||
if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
|
||||
return False
|
||||
# If match ends with a long year, the day_first is forced to true.
|
||||
if _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
|
||||
return True
|
||||
# If match starts with a short year, then day_first is force to false.
|
||||
if _is_int(groups[0]) and int(groups[0][:2]) > 31:
|
||||
return False
|
||||
# If match ends with a short year, then day_first is force to true.
|
||||
if _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
|
||||
return True
|
||||
|
||||
|
||||
def search_date(string, year_first=None, day_first=None): # pylint:disable=inconsistent-return-statements
|
||||
"""Looks for date patterns, and if found return the date and group span.
|
||||
|
||||
Assumes there are sentinels at the beginning and end of the string that
|
||||
always allow matching a non-digit delimiting the date.
|
||||
|
||||
Year can be defined on two digit only. It will return the nearest possible
|
||||
date from today.
|
||||
|
||||
>>> search_date(' This happened on 2002-04-22. ')
|
||||
(18, 28, datetime.date(2002, 4, 22))
|
||||
|
||||
>>> search_date(' And this on 17-06-1998. ')
|
||||
(13, 23, datetime.date(1998, 6, 17))
|
||||
|
||||
>>> search_date(' no date in here ')
|
||||
"""
|
||||
for date_re in date_regexps:
|
||||
search_match = date_re.search(string)
|
||||
if not search_match:
|
||||
continue
|
||||
|
||||
start, end = search_match.start(1), search_match.end(1)
|
||||
groups = search_match.groups()[1:]
|
||||
match = '-'.join(groups)
|
||||
|
||||
if match is None:
|
||||
continue
|
||||
|
||||
if year_first and day_first is None:
|
||||
day_first = False
|
||||
|
||||
if day_first is None:
|
||||
day_first = _guess_day_first_parameter(groups)
|
||||
|
||||
# If day_first/year_first is undefined, parse is made using both possible values.
|
||||
yearfirst_opts = [False, True]
|
||||
if year_first is not None:
|
||||
yearfirst_opts = [year_first]
|
||||
|
||||
dayfirst_opts = [True, False]
|
||||
if day_first is not None:
|
||||
dayfirst_opts = [day_first]
|
||||
|
||||
kwargs_list = ({'dayfirst': d, 'yearfirst': y}
|
||||
for d in dayfirst_opts for y in yearfirst_opts)
|
||||
for kwargs in kwargs_list:
|
||||
try:
|
||||
date = parser.parse(match, **kwargs)
|
||||
except (ValueError, TypeError): # pragma: no cover
|
||||
# see https://bugs.launchpad.net/dateutil/+bug/1247643
|
||||
date = None
|
||||
|
||||
# check date plausibility
|
||||
if date and valid_year(date.year): # pylint:disable=no-member
|
||||
return start, end, date.date() # pylint:disable=no-member
|
53
libs/common/guessit/rules/common/expected.py
Normal file
53
libs/common/guessit/rules/common/expected.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Expected property factory
|
||||
"""
|
||||
import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from rebulk.utils import find_all
|
||||
|
||||
from . import dash, seps
|
||||
|
||||
|
||||
def build_expected_function(context_key):
|
||||
"""
|
||||
Creates a expected property function
|
||||
:param context_key:
|
||||
:type context_key:
|
||||
:param cleanup:
|
||||
:type cleanup:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def expected(input_string, context):
|
||||
"""
|
||||
Expected property functional pattern.
|
||||
:param input_string:
|
||||
:type input_string:
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = []
|
||||
for search in context.get(context_key):
|
||||
if search.startswith('re:'):
|
||||
search = search[3:]
|
||||
search = search.replace(' ', '-')
|
||||
matches = Rebulk().regex(search, abbreviations=[dash], flags=re.IGNORECASE) \
|
||||
.matches(input_string, context)
|
||||
for match in matches:
|
||||
ret.append(match.span)
|
||||
else:
|
||||
value = search
|
||||
for sep in seps:
|
||||
input_string = input_string.replace(sep, ' ')
|
||||
search = search.replace(sep, ' ')
|
||||
for start in find_all(input_string, search, ignore_case=True):
|
||||
ret.append({'start': start, 'end': start + len(search), 'value': value})
|
||||
return ret
|
||||
|
||||
return expected
|
136
libs/common/guessit/rules/common/formatters.py
Normal file
136
libs/common/guessit/rules/common/formatters.py
Normal file
|
@ -0,0 +1,136 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Formatters
|
||||
"""
|
||||
from rebulk.formatters import formatters
|
||||
from rebulk.remodule import re
|
||||
from . import seps
|
||||
|
||||
_excluded_clean_chars = ',:;-/\\'
|
||||
clean_chars = ""
|
||||
for sep in seps:
|
||||
if sep not in _excluded_clean_chars:
|
||||
clean_chars += sep
|
||||
|
||||
|
||||
def _potential_before(i, input_string):
|
||||
"""
|
||||
Check if the character at position i can be a potential single char separator considering what's before it.
|
||||
|
||||
:param i:
|
||||
:type i: int
|
||||
:param input_string:
|
||||
:type input_string: str
|
||||
:return:
|
||||
:rtype: bool
|
||||
"""
|
||||
return i - 2 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
|
||||
|
||||
|
||||
def _potential_after(i, input_string):
|
||||
"""
|
||||
Check if the character at position i can be a potential single char separator considering what's after it.
|
||||
|
||||
:param i:
|
||||
:type i: int
|
||||
:param input_string:
|
||||
:type input_string: str
|
||||
:return:
|
||||
:rtype: bool
|
||||
"""
|
||||
return i + 2 >= len(input_string) or \
|
||||
input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps
|
||||
|
||||
|
||||
def cleanup(input_string):
|
||||
"""
|
||||
Removes and strip separators from input_string (but keep ',;' characters)
|
||||
|
||||
It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)
|
||||
|
||||
:param input_string:
|
||||
:type input_string: str
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
clean_string = input_string
|
||||
for char in clean_chars:
|
||||
clean_string = clean_string.replace(char, ' ')
|
||||
|
||||
# Restore input separator if they separate single characters.
|
||||
# Useful for Mavels Agents of S.H.I.E.L.D.
|
||||
# https://github.com/guessit-io/guessit/issues/278
|
||||
|
||||
indices = [i for i, letter in enumerate(clean_string) if letter in seps]
|
||||
|
||||
dots = set()
|
||||
if indices:
|
||||
clean_list = list(clean_string)
|
||||
|
||||
potential_indices = []
|
||||
|
||||
for i in indices:
|
||||
if _potential_before(i, input_string) and _potential_after(i, input_string):
|
||||
potential_indices.append(i)
|
||||
|
||||
replace_indices = []
|
||||
|
||||
for potential_index in potential_indices:
|
||||
if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
|
||||
replace_indices.append(potential_index)
|
||||
|
||||
if replace_indices:
|
||||
for replace_index in replace_indices:
|
||||
dots.add(input_string[replace_index])
|
||||
clean_list[replace_index] = input_string[replace_index]
|
||||
clean_string = ''.join(clean_list)
|
||||
|
||||
clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))
|
||||
|
||||
clean_string = re.sub(' +', ' ', clean_string)
|
||||
return clean_string
|
||||
|
||||
|
||||
def strip(input_string, chars=seps):
|
||||
"""
|
||||
Strip separators from input_string
|
||||
:param input_string:
|
||||
:param chars:
|
||||
:type input_string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return input_string.strip(chars)
|
||||
|
||||
|
||||
def raw_cleanup(raw):
|
||||
"""
|
||||
Cleanup a raw value to perform raw comparison
|
||||
:param raw:
|
||||
:type raw:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return formatters(cleanup, strip)(raw.lower())
|
||||
|
||||
|
||||
def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
||||
"""
|
||||
Reorder the title
|
||||
:param title:
|
||||
:type title:
|
||||
:param articles:
|
||||
:type articles:
|
||||
:param separators:
|
||||
:type separators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ltitle = title.lower()
|
||||
for article in articles:
|
||||
for separator in separators:
|
||||
suffix = separator + article
|
||||
if ltitle[-len(suffix):] == suffix:
|
||||
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
|
||||
return title
|
165
libs/common/guessit/rules/common/numeral.py
Normal file
165
libs/common/guessit/rules/common/numeral.py
Normal file
|
@ -0,0 +1,165 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
parse numeral from various formats
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
digital_numeral = r'\d{1,4}'
|
||||
|
||||
roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
|
||||
|
||||
english_word_numeral_list = [
|
||||
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
|
||||
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
|
||||
]
|
||||
|
||||
french_word_numeral_list = [
|
||||
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
|
||||
]
|
||||
|
||||
french_alt_word_numeral_list = [
|
||||
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
|
||||
]
|
||||
|
||||
|
||||
def __build_word_numeral(*args):
|
||||
"""
|
||||
Build word numeral regexp from list.
|
||||
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
re_ = None
|
||||
for word_list in args:
|
||||
for word in word_list:
|
||||
if not re_:
|
||||
re_ = r'(?:(?=\w+)'
|
||||
else:
|
||||
re_ += '|'
|
||||
re_ += word
|
||||
re_ += ')'
|
||||
return re_
|
||||
|
||||
|
||||
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
|
||||
|
||||
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
|
||||
|
||||
__romanNumeralMap = (
|
||||
('M', 1000),
|
||||
('CM', 900),
|
||||
('D', 500),
|
||||
('CD', 400),
|
||||
('C', 100),
|
||||
('XC', 90),
|
||||
('L', 50),
|
||||
('XL', 40),
|
||||
('X', 10),
|
||||
('IX', 9),
|
||||
('V', 5),
|
||||
('IV', 4),
|
||||
('I', 1)
|
||||
)
|
||||
|
||||
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
|
||||
|
||||
|
||||
def __parse_roman(value):
|
||||
"""
|
||||
convert Roman numeral to integer
|
||||
|
||||
:param value: Value to parse
|
||||
:type value: string
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not __romanNumeralPattern.search(value):
|
||||
raise ValueError('Invalid Roman numeral: %s' % value)
|
||||
|
||||
result = 0
|
||||
index = 0
|
||||
for num, integer in __romanNumeralMap:
|
||||
while value[index:index + len(num)] == num:
|
||||
result += integer
|
||||
index += len(num)
|
||||
return result
|
||||
|
||||
|
||||
def __parse_word(value):
|
||||
"""
|
||||
Convert Word numeral to integer
|
||||
|
||||
:param value: Value to parse
|
||||
:type value: string
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
|
||||
try:
|
||||
return word_list.index(value.lower())
|
||||
except ValueError:
|
||||
pass
|
||||
raise ValueError # pragma: no cover
|
||||
|
||||
|
||||
_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
|
||||
|
||||
|
||||
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
|
||||
"""
|
||||
Parse a numeric value into integer.
|
||||
|
||||
:param value: Value to parse. Can be an integer, roman numeral or word.
|
||||
:type value: string
|
||||
:param int_enabled:
|
||||
:type int_enabled:
|
||||
:param roman_enabled:
|
||||
:type roman_enabled:
|
||||
:param word_enabled:
|
||||
:type word_enabled:
|
||||
:param clean:
|
||||
:type clean:
|
||||
:return: Numeric value, or None if value can't be parsed
|
||||
:rtype: int
|
||||
"""
|
||||
# pylint: disable=too-many-branches
|
||||
if int_enabled:
|
||||
try:
|
||||
if clean:
|
||||
match = _clean_re.match(value)
|
||||
if match:
|
||||
clean_value = match.group(1)
|
||||
return int(clean_value)
|
||||
return int(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if roman_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_roman(word.upper())
|
||||
except ValueError:
|
||||
pass
|
||||
return __parse_roman(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if word_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_word(word)
|
||||
except ValueError: # pragma: no cover
|
||||
pass
|
||||
return __parse_word(value) # pragma: no cover
|
||||
except ValueError: # pragma: no cover
|
||||
pass
|
||||
raise ValueError('Invalid numeral: ' + value) # pragma: no cover
|
27
libs/common/guessit/rules/common/pattern.py
Normal file
27
libs/common/guessit/rules/common/pattern.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Pattern utility functions
|
||||
"""
|
||||
|
||||
|
||||
def is_disabled(context, name):
|
||||
"""Whether a specific pattern is disabled.
|
||||
|
||||
The context object might define an inclusion list (includes) or an exclusion list (excludes)
|
||||
A pattern is considered disabled if it's found in the exclusion list or
|
||||
it's not found in the inclusion list and the inclusion list is not empty or not defined.
|
||||
|
||||
:param context:
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
if not context:
|
||||
return False
|
||||
|
||||
excludes = context.get('excludes')
|
||||
if excludes and name in excludes:
|
||||
return True
|
||||
|
||||
includes = context.get('includes')
|
||||
return includes and name not in includes
|
106
libs/common/guessit/rules/common/quantity.py
Normal file
106
libs/common/guessit/rules/common/quantity.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Quantities: Size
|
||||
"""
|
||||
import re
|
||||
from abc import abstractmethod
|
||||
|
||||
import six
|
||||
|
||||
from ..common import seps
|
||||
|
||||
|
||||
class Quantity(object):
|
||||
"""
|
||||
Represent a quantity object with magnitude and units.
|
||||
"""
|
||||
|
||||
parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')
|
||||
|
||||
def __init__(self, magnitude, units):
|
||||
self.magnitude = magnitude
|
||||
self.units = units
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def parse_units(cls, value):
|
||||
"""
|
||||
Parse a string to a proper unit notation.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def fromstring(cls, string):
|
||||
"""
|
||||
Parse the string into a quantity object.
|
||||
:param string:
|
||||
:return:
|
||||
"""
|
||||
values = cls.parser_re.match(string).groupdict()
|
||||
try:
|
||||
magnitude = int(values['magnitude'])
|
||||
except ValueError:
|
||||
magnitude = float(values['magnitude'])
|
||||
units = cls.parse_units(values['units'])
|
||||
|
||||
return cls(magnitude, units)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, six.string_types):
|
||||
return str(self) == other
|
||||
if not isinstance(other, self.__class__):
|
||||
return NotImplemented
|
||||
return self.magnitude == other.magnitude and self.units == other.units
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<{0} [{1}]>'.format(self.__class__.__name__, self)
|
||||
|
||||
def __str__(self):
|
||||
return '{0}{1}'.format(self.magnitude, self.units)
|
||||
|
||||
|
||||
class Size(Quantity):
|
||||
"""
|
||||
Represent size.
|
||||
|
||||
e.g.: 1.1GB, 300MB
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parse_units(cls, value):
|
||||
return value.strip(seps).upper()
|
||||
|
||||
|
||||
class BitRate(Quantity):
|
||||
"""
|
||||
Represent bit rate.
|
||||
|
||||
e.g.: 320Kbps, 1.5Mbps
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parse_units(cls, value):
|
||||
value = value.strip(seps).capitalize()
|
||||
for token in ('bits', 'bit'):
|
||||
value = value.replace(token, 'bps')
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class FrameRate(Quantity):
|
||||
"""
|
||||
Represent frame rate.
|
||||
|
||||
e.g.: 24fps, 60fps
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parse_units(cls, value):
|
||||
return 'fps'
|
51
libs/common/guessit/rules/common/validators.py
Normal file
51
libs/common/guessit/rules/common/validators.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Validators
|
||||
"""
|
||||
from functools import partial
|
||||
|
||||
from rebulk.validators import chars_before, chars_after, chars_surround
|
||||
from . import seps
|
||||
|
||||
seps_before = partial(chars_before, seps)
|
||||
seps_after = partial(chars_after, seps)
|
||||
seps_surround = partial(chars_surround, seps)
|
||||
|
||||
|
||||
def int_coercable(string):
|
||||
"""
|
||||
Check if string can be coerced to int
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
try:
|
||||
int(string)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def compose(*validators):
|
||||
"""
|
||||
Compose validators functions
|
||||
:param validators:
|
||||
:type validators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
def composed(string):
|
||||
"""
|
||||
Composed validators function
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for validator in validators:
|
||||
if not validator(string):
|
||||
return False
|
||||
return True
|
||||
return composed
|
34
libs/common/guessit/rules/common/words.py
Normal file
34
libs/common/guessit/rules/common/words.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Words utils
|
||||
"""
|
||||
from collections import namedtuple
|
||||
|
||||
from . import seps
|
||||
|
||||
_Word = namedtuple('_Word', ['span', 'value'])
|
||||
|
||||
|
||||
def iter_words(string):
|
||||
"""
|
||||
Iterate on all words in a string
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype: iterable[str]
|
||||
"""
|
||||
i = 0
|
||||
last_sep_index = -1
|
||||
inside_word = False
|
||||
for char in string:
|
||||
if ord(char) < 128 and char in seps: # Make sure we don't exclude unicode characters.
|
||||
if inside_word:
|
||||
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
||||
inside_word = False
|
||||
last_sep_index = i
|
||||
else:
|
||||
inside_word = True
|
||||
i += 1
|
||||
if inside_word:
|
||||
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
5
libs/common/guessit/rules/markers/__init__.py
Normal file
5
libs/common/guessit/rules/markers/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Markers
|
||||
"""
|
52
libs/common/guessit/rules/markers/groups.py
Normal file
52
libs/common/guessit/rules/markers/groups.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Groups markers (...), [...] and {...}
|
||||
"""
|
||||
from rebulk import Rebulk
|
||||
|
||||
|
||||
def groups(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(name="group", marker=True)
|
||||
|
||||
starting = config['starting']
|
||||
ending = config['ending']
|
||||
|
||||
def mark_groups(input_string):
|
||||
"""
|
||||
Functional pattern to mark groups (...), [...] and {...}.
|
||||
|
||||
:param input_string:
|
||||
:return:
|
||||
"""
|
||||
openings = ([], [], [])
|
||||
i = 0
|
||||
|
||||
ret = []
|
||||
for char in input_string:
|
||||
start_type = starting.find(char)
|
||||
if start_type > -1:
|
||||
openings[start_type].append(i)
|
||||
|
||||
i += 1
|
||||
|
||||
end_type = ending.find(char)
|
||||
if end_type > -1:
|
||||
try:
|
||||
start_index = openings[end_type].pop()
|
||||
ret.append((start_index, i))
|
||||
except IndexError:
|
||||
pass
|
||||
return ret
|
||||
|
||||
rebulk.functional(mark_groups)
|
||||
return rebulk
|
47
libs/common/guessit/rules/markers/path.py
Normal file
47
libs/common/guessit/rules/markers/path.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Path markers
|
||||
"""
|
||||
from rebulk import Rebulk
|
||||
|
||||
from rebulk.utils import find_all
|
||||
|
||||
|
||||
def path(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(name="path", marker=True)
|
||||
|
||||
def mark_path(input_string, context):
|
||||
"""
|
||||
Functional pattern to mark path elements.
|
||||
|
||||
:param input_string:
|
||||
:param context:
|
||||
:return:
|
||||
"""
|
||||
ret = []
|
||||
if context.get('name_only', False):
|
||||
ret.append((0, len(input_string)))
|
||||
else:
|
||||
indices = list(find_all(input_string, '/'))
|
||||
indices += list(find_all(input_string, '\\'))
|
||||
indices += [-1, len(input_string)]
|
||||
|
||||
indices.sort()
|
||||
|
||||
for i in range(0, len(indices) - 1):
|
||||
ret.append((indices[i] + 1, indices[i + 1]))
|
||||
|
||||
return ret
|
||||
|
||||
rebulk.functional(mark_path)
|
||||
return rebulk
|
257
libs/common/guessit/rules/processors.py
Normal file
257
libs/common/guessit/rules/processors.py
Normal file
|
@ -0,0 +1,257 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Processors
|
||||
"""
|
||||
from collections import defaultdict
|
||||
import copy
|
||||
|
||||
import six
|
||||
|
||||
from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
|
||||
|
||||
from .common import seps_no_groups
|
||||
from .common.formatters import cleanup
|
||||
from .common.comparators import marker_sorted
|
||||
from .common.date import valid_year
|
||||
from .common.words import iter_words
|
||||
|
||||
|
||||
class EnlargeGroupMatches(CustomRule):
|
||||
"""
|
||||
Enlarge matches that are starting and/or ending group to include brackets in their span.
|
||||
"""
|
||||
priority = PRE_PROCESS
|
||||
|
||||
def when(self, matches, context):
|
||||
starting = []
|
||||
ending = []
|
||||
|
||||
for group in matches.markers.named('group'):
|
||||
for match in matches.starting(group.start + 1):
|
||||
starting.append(match)
|
||||
|
||||
for match in matches.ending(group.end - 1):
|
||||
ending.append(match)
|
||||
|
||||
return starting, ending
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
starting, ending = when_response
|
||||
for match in starting:
|
||||
matches.remove(match)
|
||||
match.start -= 1
|
||||
match.raw_start += 1
|
||||
matches.append(match)
|
||||
|
||||
for match in ending:
|
||||
matches.remove(match)
|
||||
match.end += 1
|
||||
match.raw_end -= 1
|
||||
matches.append(match)
|
||||
|
||||
|
||||
class EquivalentHoles(Rule):
|
||||
"""
|
||||
Creates equivalent matches for holes that have same values than existing (case insensitive)
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
consequence = AppendMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
new_matches = []
|
||||
|
||||
for filepath in marker_sorted(matches.markers.named('path'), matches):
|
||||
holes = matches.holes(start=filepath.start, end=filepath.end, formatter=cleanup)
|
||||
for name in matches.names:
|
||||
for hole in list(holes):
|
||||
for current_match in matches.named(name):
|
||||
if isinstance(current_match.value, six.string_types) and \
|
||||
hole.value.lower() == current_match.value.lower():
|
||||
if 'equivalent-ignore' in current_match.tags:
|
||||
continue
|
||||
new_value = _preferred_string(hole.value, current_match.value)
|
||||
if hole.value != new_value:
|
||||
hole.value = new_value
|
||||
if current_match.value != new_value:
|
||||
current_match.value = new_value
|
||||
hole.name = name
|
||||
hole.tags = ['equivalent']
|
||||
new_matches.append(hole)
|
||||
if hole in holes:
|
||||
holes.remove(hole)
|
||||
|
||||
return new_matches
|
||||
|
||||
|
||||
class RemoveAmbiguous(Rule):
|
||||
"""
|
||||
If multiple matches are found with same name and different values, keep the one in the most valuable filepart.
|
||||
Also keep others match with same name and values than those kept ones.
|
||||
"""
|
||||
|
||||
priority = POST_PROCESS
|
||||
consequence = RemoveMatch
|
||||
|
||||
def __init__(self, sort_function=marker_sorted, predicate=None):
|
||||
super(RemoveAmbiguous, self).__init__()
|
||||
self.sort_function = sort_function
|
||||
self.predicate = predicate
|
||||
|
||||
def when(self, matches, context):
|
||||
fileparts = self.sort_function(matches.markers.named('path'), matches)
|
||||
|
||||
previous_fileparts_names = set()
|
||||
values = defaultdict(list)
|
||||
|
||||
to_remove = []
|
||||
for filepart in fileparts:
|
||||
filepart_matches = matches.range(filepart.start, filepart.end, predicate=self.predicate)
|
||||
|
||||
filepart_names = set()
|
||||
for match in filepart_matches:
|
||||
filepart_names.add(match.name)
|
||||
if match.name in previous_fileparts_names:
|
||||
if match.value not in values[match.name]:
|
||||
to_remove.append(match)
|
||||
else:
|
||||
if match.value not in values[match.name]:
|
||||
values[match.name].append(match.value)
|
||||
|
||||
previous_fileparts_names.update(filepart_names)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveLessSpecificSeasonEpisode(RemoveAmbiguous):
|
||||
"""
|
||||
If multiple season/episodes matches are found with different values,
|
||||
keep the one tagged as 'SxxExx' or in the rightmost filepart.
|
||||
"""
|
||||
def __init__(self, name):
|
||||
super(RemoveLessSpecificSeasonEpisode, self).__init__(
|
||||
sort_function=(lambda markers, matches:
|
||||
marker_sorted(list(reversed(markers)), matches,
|
||||
lambda match: match.name == name and 'SxxExx' in match.tags)),
|
||||
predicate=lambda match: match.name == name)
|
||||
|
||||
|
||||
def _preferred_string(value1, value2): # pylint:disable=too-many-return-statements
|
||||
"""
|
||||
Retrieves preferred title from both values.
|
||||
:param value1:
|
||||
:type value1: str
|
||||
:param value2:
|
||||
:type value2: str
|
||||
:return: The preferred title
|
||||
:rtype: str
|
||||
"""
|
||||
if value1 == value2:
|
||||
return value1
|
||||
if value1.istitle() and not value2.istitle():
|
||||
return value1
|
||||
if not value1.isupper() and value2.isupper():
|
||||
return value1
|
||||
if not value1.isupper() and value1[0].isupper() and not value2[0].isupper():
|
||||
return value1
|
||||
if _count_title_words(value1) > _count_title_words(value2):
|
||||
return value1
|
||||
return value2
|
||||
|
||||
|
||||
def _count_title_words(value):
|
||||
"""
|
||||
Count only many words are titles in value.
|
||||
:param value:
|
||||
:type value:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = 0
|
||||
for word in iter_words(value):
|
||||
if word.value.istitle():
|
||||
ret += 1
|
||||
return ret
|
||||
|
||||
|
||||
class SeasonYear(Rule):
|
||||
"""
|
||||
If a season is a valid year and no year was found, create an match with year.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
consequence = AppendMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
if not matches.named('year'):
|
||||
for season in matches.named('season'):
|
||||
if valid_year(season.value):
|
||||
year = copy.copy(season)
|
||||
year.name = 'year'
|
||||
ret.append(year)
|
||||
return ret
|
||||
|
||||
|
||||
class YearSeason(Rule):
|
||||
"""
|
||||
If a year is found, no season found, and episode is found, create an match with season.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
consequence = AppendMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
if not matches.named('season') and matches.named('episode'):
|
||||
for year in matches.named('year'):
|
||||
season = copy.copy(year)
|
||||
season.name = 'season'
|
||||
ret.append(season)
|
||||
return ret
|
||||
|
||||
|
||||
class Processors(CustomRule):
|
||||
"""
|
||||
Empty rule for ordering post_processing properly.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
def when(self, matches, context):
|
||||
pass
|
||||
|
||||
def then(self, matches, when_response, context): # pragma: no cover
|
||||
pass
|
||||
|
||||
|
||||
class StripSeparators(CustomRule):
|
||||
"""
|
||||
Strip separators from matches. Keep separators if they are from acronyms, like in ".S.H.I.E.L.D."
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
def when(self, matches, context):
|
||||
return matches
|
||||
|
||||
def then(self, matches, when_response, context): # pragma: no cover
|
||||
for match in matches:
|
||||
for _ in range(0, len(match.span)):
|
||||
if match.raw[0] in seps_no_groups and (len(match.raw) < 3 or match.raw[2] not in seps_no_groups):
|
||||
match.raw_start += 1
|
||||
|
||||
for _ in reversed(range(0, len(match.span))):
|
||||
if match.raw[-1] in seps_no_groups and (len(match.raw) < 3 or match.raw[-3] not in seps_no_groups):
|
||||
match.raw_end -= 1
|
||||
|
||||
|
||||
def processors(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
|
||||
RemoveLessSpecificSeasonEpisode('season'),
|
||||
RemoveLessSpecificSeasonEpisode('episode'),
|
||||
RemoveAmbiguous, SeasonYear, YearSeason, Processors, StripSeparators)
|
5
libs/common/guessit/rules/properties/__init__.py
Normal file
5
libs/common/guessit/rules/properties/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Properties
|
||||
"""
|
230
libs/common/guessit/rules/properties/audio_codec.py
Normal file
230
libs/common/guessit/rules/properties/audio_codec.py
Normal file
|
@ -0,0 +1,230 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
audio_codec, audio_profile and audio_channels property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_before, seps_after
|
||||
|
||||
audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
|
||||
|
||||
|
||||
def audio_codec(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
|
||||
def audio_codec_priority(match1, match2):
|
||||
"""
|
||||
Gives priority to audio_codec
|
||||
:param match1:
|
||||
:type match1:
|
||||
:param match2:
|
||||
:type match2:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']:
|
||||
return match2
|
||||
if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec':
|
||||
return match1
|
||||
return '__default__'
|
||||
|
||||
rebulk.defaults(name='audio_codec',
|
||||
conflict_solver=audio_codec_priority,
|
||||
disabled=lambda context: is_disabled(context, 'audio_codec'))
|
||||
|
||||
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
|
||||
rebulk.string("MP2", value="MP2")
|
||||
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital')
|
||||
rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos')
|
||||
rebulk.string("AAC", value="AAC")
|
||||
rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus')
|
||||
rebulk.string("Flac", value="FLAC")
|
||||
rebulk.string("DTS", value="DTS")
|
||||
rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD',
|
||||
conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__')
|
||||
rebulk.regex('True-?HD', value='Dolby TrueHD')
|
||||
rebulk.string('Opus', value='Opus')
|
||||
rebulk.string('Vorbis', value='Vorbis')
|
||||
rebulk.string('PCM', value='PCM')
|
||||
rebulk.string('LPCM', value='LPCM')
|
||||
|
||||
rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile'))
|
||||
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
|
||||
rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC'])
|
||||
rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC'])
|
||||
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||
|
||||
rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels'))
|
||||
rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True)
|
||||
rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True)
|
||||
rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True)
|
||||
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.string('7ch', '8ch', value='7.1')
|
||||
rebulk.string('5ch', '6ch', value='5.1')
|
||||
rebulk.string('2ch', 'stereo', value='2.0')
|
||||
rebulk.string('1ch', 'mono', value='1.0')
|
||||
|
||||
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
|
||||
AudioChannelsValidatorRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class AudioValidatorRule(Rule):
|
||||
"""
|
||||
Remove audio properties if not surrounded by separators and not next each others
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
||||
audio_list = matches.range(predicate=lambda match: match.name in audio_properties)
|
||||
for audio in audio_list:
|
||||
if not seps_before(audio):
|
||||
valid_before = matches.range(audio.start - 1, audio.start,
|
||||
lambda match: match.name in audio_properties)
|
||||
if not valid_before:
|
||||
ret.append(audio)
|
||||
continue
|
||||
if not seps_after(audio):
|
||||
valid_after = matches.range(audio.end, audio.end + 1,
|
||||
lambda match: match.name in audio_properties)
|
||||
if not valid_after:
|
||||
ret.append(audio)
|
||||
continue
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class AudioProfileRule(Rule):
|
||||
"""
|
||||
Abstract rule to validate audio profiles
|
||||
"""
|
||||
priority = 64
|
||||
dependency = AudioValidatorRule
|
||||
consequence = RemoveMatch
|
||||
|
||||
def __init__(self, codec):
|
||||
super(AudioProfileRule, self).__init__()
|
||||
self.codec = codec
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'audio_profile')
|
||||
|
||||
def when(self, matches, context):
|
||||
profile_list = matches.named('audio_profile',
|
||||
lambda match: 'audio_profile.rule' in match.tags and
|
||||
self.codec in match.tags)
|
||||
ret = []
|
||||
for profile in profile_list:
|
||||
codec = matches.at_span(profile.span,
|
||||
lambda match: match.name == 'audio_codec' and
|
||||
match.value == self.codec, 0)
|
||||
if not codec:
|
||||
codec = matches.previous(profile,
|
||||
lambda match: match.name == 'audio_codec' and
|
||||
match.value == self.codec)
|
||||
if not codec:
|
||||
codec = matches.next(profile,
|
||||
lambda match: match.name == 'audio_codec' and
|
||||
match.value == self.codec)
|
||||
if not codec:
|
||||
ret.append(profile)
|
||||
if codec:
|
||||
ret.extend(matches.conflicting(profile))
|
||||
return ret
|
||||
|
||||
|
||||
class DtsHDRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate DTS-HD profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(DtsHDRule, self).__init__('DTS-HD')
|
||||
|
||||
|
||||
class DtsRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate DTS profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(DtsRule, self).__init__('DTS')
|
||||
|
||||
|
||||
class AacRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate AAC profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(AacRule, self).__init__('AAC')
|
||||
|
||||
|
||||
class DolbyDigitalRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate Dolby Digital profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(DolbyDigitalRule, self).__init__('Dolby Digital')
|
||||
|
||||
|
||||
class HqConflictRule(Rule):
|
||||
"""
|
||||
Solve conflict between HQ from other property and from audio_profile.
|
||||
"""
|
||||
|
||||
dependency = [DtsHDRule, DtsRule, AacRule, DolbyDigitalRule]
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'audio_profile')
|
||||
|
||||
def when(self, matches, context):
|
||||
hq_audio = matches.named('audio_profile', lambda m: m.value == 'High Quality')
|
||||
hq_audio_spans = [match.span for match in hq_audio]
|
||||
return matches.named('other', lambda m: m.span in hq_audio_spans)
|
||||
|
||||
|
||||
class AudioChannelsValidatorRule(Rule):
|
||||
"""
|
||||
Remove audio_channel if no audio codec as previous match.
|
||||
"""
|
||||
priority = 128
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'audio_channels')
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
||||
for audio_channel in matches.tagged('weak-audio_channels'):
|
||||
valid_before = matches.range(audio_channel.start - 1, audio_channel.start,
|
||||
lambda match: match.name == 'audio_codec')
|
||||
if not valid_before:
|
||||
ret.append(audio_channel)
|
||||
|
||||
return ret
|
72
libs/common/guessit/rules/properties/bit_rate.py
Normal file
72
libs/common/guessit/rules/properties/bit_rate.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
video_bit_rate and audio_bit_rate properties
|
||||
"""
|
||||
import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from rebulk.rules import Rule, RemoveMatch, RenameMatch
|
||||
|
||||
from ..common import dash, seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.quantity import BitRate
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def bit_rate(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
|
||||
and is_disabled(context, 'video_bit_rate')))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
|
||||
rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
|
||||
conflict_solver=(
|
||||
lambda match, other: match
|
||||
if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
||||
else other
|
||||
),
|
||||
formatter=BitRate.fromstring, tags=['release-group-prefix'])
|
||||
|
||||
rebulk.rules(BitRateTypeRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class BitRateTypeRule(Rule):
|
||||
"""
|
||||
Convert audio bit rate guess into video bit rate.
|
||||
"""
|
||||
consequence = [RenameMatch('video_bit_rate'), RemoveMatch]
|
||||
|
||||
def when(self, matches, context):
|
||||
to_rename = []
|
||||
to_remove = []
|
||||
|
||||
if is_disabled(context, 'audio_bit_rate'):
|
||||
to_remove.extend(matches.named('audio_bit_rate'))
|
||||
else:
|
||||
video_bit_rate_disabled = is_disabled(context, 'video_bit_rate')
|
||||
for match in matches.named('audio_bit_rate'):
|
||||
previous = matches.previous(match, index=0,
|
||||
predicate=lambda m: m.name in ('source', 'screen_size', 'video_codec'))
|
||||
if previous and not matches.holes(previous.end, match.start, predicate=lambda m: m.value.strip(seps)):
|
||||
after = matches.next(match, index=0, predicate=lambda m: m.name == 'audio_codec')
|
||||
if after and not matches.holes(match.end, after.start, predicate=lambda m: m.value.strip(seps)):
|
||||
bitrate = match.value
|
||||
if bitrate.units == 'Kbps' or (bitrate.units == 'Mbps' and bitrate.magnitude < 10):
|
||||
continue
|
||||
|
||||
if video_bit_rate_disabled:
|
||||
to_remove.append(match)
|
||||
else:
|
||||
to_rename.append(match)
|
||||
|
||||
return to_rename, to_remove
|
55
libs/common/guessit/rules/properties/bonus.py
Normal file
55
libs/common/guessit/rules/properties/bonus.py
Normal file
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
bonus property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, AppendMatch, Rule
|
||||
|
||||
from .title import TitleFromPosition
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def bonus(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
|
||||
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
|
||||
validator={'__parent__': lambda match: seps_surround},
|
||||
conflict_solver=lambda match, conflicting: match
|
||||
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
|
||||
else '__default__')
|
||||
|
||||
rebulk.rules(BonusTitleRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class BonusTitleRule(Rule):
|
||||
"""
|
||||
Find bonus title after bonus.
|
||||
"""
|
||||
dependency = TitleFromPosition
|
||||
consequence = AppendMatch
|
||||
|
||||
properties = {'bonus_title': [None]}
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
|
||||
if bonus_number:
|
||||
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
||||
hole = matches.holes(bonus_number.end, filepath.end + 1, formatter=cleanup, index=0)
|
||||
if hole and hole.value:
|
||||
hole.name = 'bonus_title'
|
||||
return hole
|
41
libs/common/guessit/rules/properties/cds.py
Normal file
41
libs/common/guessit/rules/properties/cds.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
cd and cd_count properties
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
|
||||
|
||||
def cds(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
|
||||
rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
|
||||
validator={'cd': lambda match: 0 < match.value < 100,
|
||||
'cd_count': lambda match: 0 < match.value < 100},
|
||||
formatter={'cd': int, 'cd_count': int},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
properties={'cd': [None], 'cd_count': [None]})
|
||||
rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
|
||||
validator={'cd': lambda match: 0 < match.value < 100,
|
||||
'cd_count': lambda match: 0 < match.value < 100},
|
||||
formatter={'cd_count': int},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
properties={'cd': [None], 'cd_count': [None]})
|
||||
|
||||
return rebulk
|
60
libs/common/guessit/rules/properties/container.py
Normal file
60
libs/common/guessit/rules/properties/container.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
container property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
from ..common import seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def container(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name='container',
|
||||
formatter=lambda value: value.strip(seps),
|
||||
tags=['extension'],
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ('source', 'video_codec') or
|
||||
other.name == 'container' and 'extension' not in other.tags
|
||||
else '__default__')
|
||||
|
||||
subtitles = config['subtitles']
|
||||
info = config['info']
|
||||
videos = config['videos']
|
||||
torrent = config['torrent']
|
||||
nzb = config['nzb']
|
||||
|
||||
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
|
||||
|
||||
rebulk.defaults(name='container',
|
||||
validator=seps_surround,
|
||||
formatter=lambda s: s.lower(),
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name in ('source',
|
||||
'video_codec') or other.name == 'container' and 'extension' in other.tags
|
||||
else '__default__')
|
||||
|
||||
rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
|
||||
rebulk.string(*videos, tags=['video'])
|
||||
rebulk.string(*torrent, tags=['torrent'])
|
||||
rebulk.string(*nzb, tags=['nzb'])
|
||||
|
||||
return rebulk
|
114
libs/common/guessit/rules/properties/country.py
Normal file
114
libs/common/guessit/rules/properties/country.py
Normal file
|
@ -0,0 +1,114 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
country property
|
||||
"""
|
||||
# pylint: disable=no-member
|
||||
import babelfish
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.words import iter_words
|
||||
|
||||
|
||||
def country(config, common_words):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:param common_words: common words
|
||||
:type common_words: set
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
|
||||
rebulk = rebulk.defaults(name='country')
|
||||
|
||||
def find_countries(string, context=None):
|
||||
"""
|
||||
Find countries in given string.
|
||||
"""
|
||||
allowed_countries = context.get('allowed_countries') if context else None
|
||||
return CountryFinder(allowed_countries, common_words).find(string)
|
||||
|
||||
rebulk.functional(find_countries,
|
||||
# Prefer language and any other property over country if not US or GB.
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name != 'language' or match.value not in (babelfish.Country('US'),
|
||||
babelfish.Country('GB'))
|
||||
else other,
|
||||
properties={'country': [None]},
|
||||
disabled=lambda context: not context.get('allowed_countries'))
|
||||
|
||||
babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
|
||||
def __init__(self, synonyms):
|
||||
self.guessit_exceptions = {}
|
||||
|
||||
for alpha2, synlist in synonyms.items():
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = alpha2
|
||||
|
||||
@property
|
||||
def codes(self): # pylint: disable=missing-docstring
|
||||
return (babelfish.country_converters['name'].codes |
|
||||
frozenset(babelfish.COUNTRIES.values()) |
|
||||
frozenset(self.guessit_exceptions.keys()))
|
||||
|
||||
def convert(self, alpha2):
|
||||
if alpha2 == 'GB':
|
||||
return 'UK'
|
||||
return str(babelfish.Country(alpha2))
|
||||
|
||||
def reverse(self, name): # pylint:disable=arguments-differ
|
||||
# exceptions come first, as they need to override a potential match
|
||||
# with any of the other guessers
|
||||
try:
|
||||
return self.guessit_exceptions[name.lower()]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return babelfish.Country(name.upper()).alpha2
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
for conv in [babelfish.Country.fromname]:
|
||||
try:
|
||||
return conv(name).alpha2
|
||||
except babelfish.CountryReverseError:
|
||||
pass
|
||||
|
||||
raise babelfish.CountryReverseError(name)
|
||||
|
||||
|
||||
class CountryFinder(object):
|
||||
"""Helper class to search and return country matches."""
|
||||
|
||||
def __init__(self, allowed_countries, common_words):
|
||||
self.allowed_countries = {l.lower() for l in allowed_countries or []}
|
||||
self.common_words = common_words
|
||||
|
||||
def find(self, string):
|
||||
"""Return all matches for country."""
|
||||
for word_match in iter_words(string.strip().lower()):
|
||||
word = word_match.value
|
||||
if word.lower() in self.common_words:
|
||||
continue
|
||||
|
||||
try:
|
||||
country_object = babelfish.Country.fromguessit(word)
|
||||
if (country_object.name.lower() in self.allowed_countries or
|
||||
country_object.alpha2.lower() in self.allowed_countries):
|
||||
yield self._to_rebulk_match(word_match, country_object)
|
||||
except babelfish.Error:
|
||||
continue
|
||||
|
||||
@classmethod
|
||||
def _to_rebulk_match(cls, word, value):
|
||||
return word.span[0], word.span[1], {'value': value}
|
90
libs/common/guessit/rules/properties/crc.py
Normal file
90
libs/common/guessit/rules/properties/crc.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
crc and uuid properties
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def crc(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(validator=seps_surround)
|
||||
|
||||
rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ['episode', 'season']
|
||||
else '__default__')
|
||||
|
||||
rebulk.functional(guess_idnumber, name='uuid',
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name in ['episode', 'season']
|
||||
else '__default__')
|
||||
return rebulk
|
||||
|
||||
|
||||
_DIGIT = 0
|
||||
_LETTER = 1
|
||||
_OTHER = 2
|
||||
|
||||
_idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
|
||||
|
||||
|
||||
def guess_idnumber(string):
|
||||
"""
|
||||
Guess id number function
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint:disable=invalid-name
|
||||
ret = []
|
||||
|
||||
matches = list(_idnum.finditer(string))
|
||||
for match in matches:
|
||||
result = match.groupdict()
|
||||
switch_count = 0
|
||||
switch_letter_count = 0
|
||||
letter_count = 0
|
||||
last_letter = None
|
||||
|
||||
last = _LETTER
|
||||
for c in result['uuid']:
|
||||
if c in '0123456789':
|
||||
ci = _DIGIT
|
||||
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||
ci = _LETTER
|
||||
if c != last_letter:
|
||||
switch_letter_count += 1
|
||||
last_letter = c
|
||||
letter_count += 1
|
||||
else:
|
||||
ci = _OTHER
|
||||
|
||||
if ci != last:
|
||||
switch_count += 1
|
||||
|
||||
last = ci
|
||||
|
||||
# only return the result as probable if we alternate often between
|
||||
# char type (more likely for hash values than for common words)
|
||||
switch_ratio = float(switch_count) / len(result['uuid'])
|
||||
letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
|
||||
|
||||
if switch_ratio > 0.4 and letters_ratio > 0.4:
|
||||
ret.append(match.span())
|
||||
|
||||
return ret
|
84
libs/common/guessit/rules/properties/date.py
Normal file
84
libs/common/guessit/rules/properties/date.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
date and year properties
|
||||
"""
|
||||
from rebulk import Rebulk, RemoveMatch, Rule
|
||||
|
||||
from ..common.date import search_date, valid_year
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def date(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().defaults(validator=seps_surround)
|
||||
|
||||
rebulk.regex(r"\d{4}", name="year", formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'year'),
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ('episode', 'season') and len(other.raw) < len(match.raw)
|
||||
else '__default__',
|
||||
validator=lambda match: seps_surround(match) and valid_year(match.value))
|
||||
|
||||
def date_functional(string, context): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Search for date in the string and retrieves match
|
||||
|
||||
:param string:
|
||||
:return:
|
||||
"""
|
||||
|
||||
ret = search_date(string, context.get('date_year_first'), context.get('date_day_first'))
|
||||
if ret:
|
||||
return ret[0], ret[1], {'value': ret[2]}
|
||||
|
||||
rebulk.functional(date_functional, name="date", properties={'date': [None]},
|
||||
disabled=lambda context: is_disabled(context, 'date'),
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ('episode', 'season', 'crc32')
|
||||
else '__default__')
|
||||
|
||||
rebulk.rules(KeepMarkedYearInFilepart)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class KeepMarkedYearInFilepart(Rule):
|
||||
"""
|
||||
Keep first years marked with [](){} in filepart, or if no year is marked, ensure it won't override titles.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
if len(matches.named('year')) > 1:
|
||||
for filepart in matches.markers.named('path'):
|
||||
years = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year')
|
||||
if len(years) > 1:
|
||||
group_years = []
|
||||
ungroup_years = []
|
||||
for year in years:
|
||||
if matches.markers.at_match(year, lambda marker: marker.name == 'group'):
|
||||
group_years.append(year)
|
||||
else:
|
||||
ungroup_years.append(year)
|
||||
if group_years and ungroup_years:
|
||||
ret.extend(ungroup_years)
|
||||
ret.extend(group_years[1:]) # Keep the first year in marker.
|
||||
elif not group_years:
|
||||
ret.append(ungroup_years[0]) # Keep first year for title.
|
||||
if len(ungroup_years) > 2:
|
||||
ret.extend(ungroup_years[2:])
|
||||
return ret
|
52
libs/common/guessit/rules/properties/edition.py
Normal file
52
libs/common/guessit/rules/properties/edition.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
edition property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def edition(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name='edition', validator=seps_surround)
|
||||
|
||||
rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
|
||||
rebulk.regex('special-edition', 'edition-special', value='Special',
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name == 'episode_details' and other.value == 'Special'
|
||||
else '__default__')
|
||||
rebulk.string('se', value='Special', tags='has-neighbor')
|
||||
rebulk.string('ddc', value="Director's Definitive Cut")
|
||||
rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
|
||||
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
|
||||
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
|
||||
value="Director's Cut")
|
||||
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
|
||||
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
|
||||
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
|
||||
rebulk.regex('imax', 'imax-edition', value='IMAX')
|
||||
rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
|
||||
rebulk.regex('ultimate-edition', value='Ultimate')
|
||||
rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
|
||||
rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])
|
||||
|
||||
return rebulk
|
300
libs/common/guessit/rules/properties/episode_title.py
Normal file
300
libs/common/guessit/rules/properties/episode_title.py
Normal file
|
@ -0,0 +1,300 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Episode title
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PROCESS
|
||||
|
||||
from ..common import seps, title_seps
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..properties.title import TitleFromPosition, TitleBaseRule
|
||||
from ..properties.type import TypeProcessor
|
||||
|
||||
|
||||
def episode_title(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
previous_names = ('episode', 'episode_count',
|
||||
'season', 'season_count', 'date', 'title', 'year')
|
||||
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'episode_title'))
|
||||
rebulk = rebulk.rules(RemoveConflictsWithEpisodeTitle(previous_names),
|
||||
EpisodeTitleFromPosition(previous_names),
|
||||
AlternativeTitleReplace(previous_names),
|
||||
TitleToEpisodeTitle,
|
||||
Filepart3EpisodeTitle,
|
||||
Filepart2EpisodeTitle,
|
||||
RenameEpisodeTitleWhenMovieType)
|
||||
return rebulk
|
||||
|
||||
|
||||
class RemoveConflictsWithEpisodeTitle(Rule):
|
||||
"""
|
||||
Remove conflicting matches that might lead to wrong episode_title parsing.
|
||||
"""
|
||||
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(RemoveConflictsWithEpisodeTitle, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
self.next_names = ('streaming_service', 'screen_size', 'source',
|
||||
'video_codec', 'audio_codec', 'other', 'container')
|
||||
self.affected_if_holes_after = ('part', )
|
||||
self.affected_names = ('part', 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: m.name in self.affected_names):
|
||||
before = matches.range(filepart.start, match.start, predicate=lambda m: not m.private, index=-1)
|
||||
if not before or before.name not in self.previous_names:
|
||||
continue
|
||||
|
||||
after = matches.range(match.end, filepart.end, predicate=lambda m: not m.private, index=0)
|
||||
if not after or after.name not in self.next_names:
|
||||
continue
|
||||
|
||||
group = matches.markers.at_match(match, predicate=lambda m: m.name == 'group', index=0)
|
||||
|
||||
def has_value_in_same_group(current_match, current_group=group):
|
||||
"""Return true if current match has value and belongs to the current group."""
|
||||
return current_match.value.strip(seps) and (
|
||||
current_group == matches.markers.at_match(current_match,
|
||||
predicate=lambda mm: mm.name == 'group', index=0)
|
||||
)
|
||||
|
||||
holes_before = matches.holes(before.end, match.start, predicate=has_value_in_same_group)
|
||||
holes_after = matches.holes(match.end, after.start, predicate=has_value_in_same_group)
|
||||
|
||||
if not holes_before and not holes_after:
|
||||
continue
|
||||
|
||||
if match.name in self.affected_if_holes_after and not holes_after:
|
||||
continue
|
||||
|
||||
to_remove.append(match)
|
||||
if match.parent:
|
||||
to_remove.append(match.parent)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class TitleToEpisodeTitle(Rule):
|
||||
"""
|
||||
If multiple different title are found, convert the one following episode number to episode_title.
|
||||
"""
|
||||
dependency = TitleFromPosition
|
||||
|
||||
def when(self, matches, context):
|
||||
titles = matches.named('title')
|
||||
title_groups = defaultdict(list)
|
||||
for title in titles:
|
||||
title_groups[title.value].append(title)
|
||||
|
||||
episode_titles = []
|
||||
if len(title_groups) < 2:
|
||||
return episode_titles
|
||||
|
||||
for title in titles:
|
||||
if matches.previous(title, lambda match: match.name == 'episode'):
|
||||
episode_titles.append(title)
|
||||
|
||||
return episode_titles
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
for title in when_response:
|
||||
matches.remove(title)
|
||||
title.name = 'episode_title'
|
||||
matches.append(title)
|
||||
|
||||
|
||||
class EpisodeTitleFromPosition(TitleBaseRule):
|
||||
"""
|
||||
Add episode title match in existing matches
|
||||
Must run after TitleFromPosition rule.
|
||||
"""
|
||||
dependency = TitleToEpisodeTitle
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
|
||||
self.previous_names = previous_names
|
||||
|
||||
def hole_filter(self, hole, matches):
|
||||
episode = matches.previous(hole,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
||||
return episode or crc32
|
||||
|
||||
def filepart_filter(self, filepart, matches):
|
||||
# Filepart where title was found.
|
||||
if matches.range(filepart.start, filepart.end, lambda match: match.name == 'title'):
|
||||
return True
|
||||
return False
|
||||
|
||||
def should_remove(self, match, matches, filepart, hole, context):
|
||||
if match.name == 'episode_details':
|
||||
return False
|
||||
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
return super(EpisodeTitleFromPosition, self).when(matches, context)
|
||||
|
||||
|
||||
class AlternativeTitleReplace(Rule):
|
||||
"""
|
||||
If alternateTitle was found and title is next to episode, season or date, replace it with episode_title.
|
||||
"""
|
||||
dependency = EpisodeTitleFromPosition
|
||||
consequence = RenameMatch
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(AlternativeTitleReplace, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
|
||||
alternative_title = matches.range(predicate=lambda match: match.name == 'alternative_title', index=0)
|
||||
if alternative_title:
|
||||
main_title = matches.chain_before(alternative_title.start, seps=seps,
|
||||
predicate=lambda match: 'title' in match.tags, index=0)
|
||||
if main_title:
|
||||
episode = matches.previous(main_title,
|
||||
lambda previous: any(name in previous.names
|
||||
for name in self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
||||
if episode or crc32:
|
||||
return alternative_title
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
when_response.name = 'episode_title'
|
||||
when_response.tags.append('alternative-replaced')
|
||||
matches.append(when_response)
|
||||
|
||||
|
||||
class RenameEpisodeTitleWhenMovieType(Rule):
|
||||
"""
|
||||
Rename episode_title by alternative_title when type is movie.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
dependency = TypeProcessor
|
||||
consequence = RenameMatch
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('episode_title', lambda m: 'alternative-replaced' not in m.tags) \
|
||||
and not matches.named('type', lambda m: m.value == 'episode'):
|
||||
return matches.named('episode_title')
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
for match in when_response:
|
||||
matches.remove(match)
|
||||
match.name = 'alternative_title'
|
||||
matches.append(match)
|
||||
|
||||
|
||||
class Filepart3EpisodeTitle(Rule):
|
||||
"""
|
||||
If we have at least 3 filepart structured like this:
|
||||
|
||||
Serie name/SO1/E01-episode_title.mkv
|
||||
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
||||
|
||||
Serie name/SO1/episode_title-E01.mkv
|
||||
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
||||
|
||||
If CCCC contains episode and BBB contains seasonNumber
|
||||
Then title is to be found in AAAA.
|
||||
"""
|
||||
consequence = AppendMatch('title')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.tagged('filepart-title'):
|
||||
return
|
||||
|
||||
fileparts = matches.markers.named('path')
|
||||
if len(fileparts) < 3:
|
||||
return
|
||||
|
||||
filename = fileparts[-1]
|
||||
directory = fileparts[-2]
|
||||
subdirectory = fileparts[-3]
|
||||
|
||||
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
|
||||
if episode_number:
|
||||
season = matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0)
|
||||
|
||||
if season:
|
||||
hole = matches.holes(subdirectory.start, subdirectory.end,
|
||||
ignore=lambda match: 'weak-episode' in match.tags,
|
||||
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
|
||||
index=0)
|
||||
if hole:
|
||||
return hole
|
||||
|
||||
|
||||
class Filepart2EpisodeTitle(Rule):
|
||||
"""
|
||||
If we have at least 2 filepart structured like this:
|
||||
|
||||
Serie name SO1/E01-episode_title.mkv
|
||||
AAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
|
||||
|
||||
If BBBB contains episode and AAA contains a hole followed by seasonNumber
|
||||
then title is to be found in AAAA.
|
||||
|
||||
or
|
||||
|
||||
Serie name/SO1E01-episode_title.mkv
|
||||
AAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
|
||||
|
||||
If BBBB contains season and episode and AAA contains a hole
|
||||
then title is to be found in AAAA.
|
||||
"""
|
||||
consequence = AppendMatch('title')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.tagged('filepart-title'):
|
||||
return
|
||||
|
||||
fileparts = matches.markers.named('path')
|
||||
if len(fileparts) < 2:
|
||||
return
|
||||
|
||||
filename = fileparts[-1]
|
||||
directory = fileparts[-2]
|
||||
|
||||
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
|
||||
if episode_number:
|
||||
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
|
||||
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
|
||||
if season:
|
||||
hole = matches.holes(directory.start, directory.end, ignore=lambda match: 'weak-episode' in match.tags,
|
||||
formatter=cleanup, seps=title_seps,
|
||||
predicate=lambda match: match.value, index=0)
|
||||
if hole:
|
||||
hole.tags.append('filepart-title')
|
||||
return hole
|
859
libs/common/guessit/rules/properties/episodes.py
Normal file
859
libs/common/guessit/rules/properties/episodes.py
Normal file
|
@ -0,0 +1,859 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
episode, season, disc, episode_count, season_count and episode_details properties
|
||||
"""
|
||||
import copy
|
||||
from collections import defaultdict
|
||||
|
||||
from rebulk import Rebulk, RemoveMatch, Rule, AppendMatch, RenameMatch
|
||||
from rebulk.match import Match
|
||||
from rebulk.remodule import re
|
||||
from rebulk.utils import is_iterable
|
||||
|
||||
from .title import TitleFromPosition
|
||||
from ..common import dash, alt_dash, seps, seps_no_fs
|
||||
from ..common.formatters import strip
|
||||
from ..common.numeral import numeral, parse_numeral
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import compose, seps_surround, seps_before, int_coercable
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def episodes(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
|
||||
def is_season_episode_disabled(context):
|
||||
"""Whether season and episode rules should be enabled."""
|
||||
return is_disabled(context, 'episode') or is_disabled(context, 'season')
|
||||
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'])
|
||||
|
||||
episode_max_range = config['episode_max_range']
|
||||
season_max_range = config['season_max_range']
|
||||
|
||||
def episodes_season_chain_breaker(matches):
|
||||
"""
|
||||
Break chains if there's more than 100 offset between two neighbor values.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
eps = matches.named('episode')
|
||||
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
|
||||
return True
|
||||
|
||||
seasons = matches.named('season')
|
||||
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
|
||||
return True
|
||||
return False
|
||||
|
||||
rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker)
|
||||
|
||||
def season_episode_conflict_solver(match, other):
|
||||
"""
|
||||
Conflict solver for episode/season patterns
|
||||
|
||||
:param match:
|
||||
:param other:
|
||||
:return:
|
||||
"""
|
||||
if match.name != other.name:
|
||||
if match.name == 'episode' and other.name == 'year':
|
||||
return match
|
||||
if match.name in ('season', 'episode'):
|
||||
if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
|
||||
return match
|
||||
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
||||
and not match.initiator.children.named(match.name + 'Marker')) or (
|
||||
other.name == 'screen_size' and not int_coercable(other.raw)):
|
||||
|
||||
return match
|
||||
if other.name in ('season', 'episode') and match.initiator != other.initiator:
|
||||
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
|
||||
and other.initiator.name in ('weak_episode', 'weak_duplicate')):
|
||||
return '__default__'
|
||||
for current in (match, other):
|
||||
if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
|
||||
return current
|
||||
return '__default__'
|
||||
|
||||
season_words = config['season_words']
|
||||
episode_words = config['episode_words']
|
||||
of_words = config['of_words']
|
||||
all_words = config['all_words']
|
||||
season_markers = config['season_markers']
|
||||
season_ep_markers = config['season_ep_markers']
|
||||
disc_markers = config['disc_markers']
|
||||
episode_markers = config['episode_markers']
|
||||
range_separators = config['range_separators']
|
||||
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
|
||||
strong_discrete_separators = config['discrete_separators']
|
||||
discrete_separators = strong_discrete_separators + weak_discrete_separators
|
||||
|
||||
max_range_gap = config['max_range_gap']
|
||||
|
||||
def ordering_validator(match):
|
||||
"""
|
||||
Validator for season list. They should be in natural order to be validated.
|
||||
|
||||
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
|
||||
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
|
||||
"""
|
||||
values = match.children.to_dict()
|
||||
if 'season' in values and is_iterable(values['season']):
|
||||
# Season numbers must be in natural order to be validated.
|
||||
if not list(sorted(values['season'])) == values['season']:
|
||||
return False
|
||||
if 'episode' in values and is_iterable(values['episode']):
|
||||
# Season numbers must be in natural order to be validated.
|
||||
if not list(sorted(values['episode'])) == values['episode']:
|
||||
return False
|
||||
|
||||
def is_consecutive(property_name):
|
||||
"""
|
||||
Check if the property season or episode has valid consecutive values.
|
||||
:param property_name:
|
||||
:type property_name:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
previous_match = None
|
||||
valid = True
|
||||
for current_match in match.children.named(property_name):
|
||||
if previous_match:
|
||||
match.children.previous(current_match,
|
||||
lambda m: m.name == property_name + 'Separator')
|
||||
separator = match.children.previous(current_match,
|
||||
lambda m: m.name == property_name + 'Separator', 0)
|
||||
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
|
||||
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
|
||||
valid = False
|
||||
if separator.raw in strong_discrete_separators:
|
||||
valid = True
|
||||
break
|
||||
previous_match = current_match
|
||||
return valid
|
||||
|
||||
return is_consecutive('episode') and is_consecutive('season')
|
||||
|
||||
# S01E02, 01x02, S01S02S03
|
||||
rebulk.chain(formatter={'season': int, 'episode': int},
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[alt_dash],
|
||||
children=True,
|
||||
private_parent=True,
|
||||
validate_all=True,
|
||||
validator={'__parent__': ordering_validator},
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
disabled=is_season_episode_disabled) \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}).repeater('+') \
|
||||
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*') \
|
||||
.chain() \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}) \
|
||||
.chain() \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}) \
|
||||
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*') \
|
||||
.chain() \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)',
|
||||
validate_all=True,
|
||||
validator={'__parent__': seps_before}) \
|
||||
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
|
||||
name='seasonSeparator',
|
||||
escape=True) +
|
||||
r'(?P<season>\d+)').repeater('*')
|
||||
|
||||
# episode_details property
|
||||
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
|
||||
rebulk.string(episode_detail, value=episode_detail, name='episode_details',
|
||||
disabled=lambda context: is_disabled(context, 'episode_details'))
|
||||
|
||||
def validate_roman(match):
|
||||
"""
|
||||
Validate a roman match if surrounded by separators
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if int_coercable(match.raw):
|
||||
return True
|
||||
return seps_surround(match)
|
||||
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
||||
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True,
|
||||
conflict_solver=season_episode_conflict_solver)
|
||||
|
||||
rebulk.chain(abbreviations=[alt_dash],
|
||||
formatter={'season': parse_numeral, 'count': parse_numeral},
|
||||
validator={'__parent__': compose(seps_surround, ordering_validator),
|
||||
'season': validate_roman,
|
||||
'count': validate_roman},
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
|
||||
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
|
||||
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
|
||||
name='seasonSeparator', escape=True) +
|
||||
r'@?(?P<season>\d+)').repeater('*')
|
||||
|
||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
|
||||
r'(?:v(?P<version>\d+))?' +
|
||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||
abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int},
|
||||
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
|
||||
r'(?:v(?P<version>\d+))?' +
|
||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||
abbreviations=[dash],
|
||||
validator={'episode': validate_roman},
|
||||
formatter={'episode': parse_numeral, 'version': int, 'count': int},
|
||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
|
||||
tags=['SxxExx'],
|
||||
abbreviations=[dash],
|
||||
validator=None,
|
||||
formatter={'season': int, 'other': lambda match: 'Complete'},
|
||||
disabled=lambda context: is_disabled(context, 'season'))
|
||||
|
||||
# 12, 13
|
||||
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
|
||||
|
||||
# 012, 013
|
||||
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'0(?P<episode>\d{1,2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
|
||||
|
||||
# 112, 113
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
formatter={'episode': int, 'version': int},
|
||||
name='weak_episode',
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<episode>\d{3,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
|
||||
|
||||
# 1, 2, 3
|
||||
rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<episode>\d)') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
|
||||
|
||||
# e112, e113, 1e18, 3e19
|
||||
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
|
||||
rebulk.chain(formatter={'season': int, 'episode': int, 'version': int},
|
||||
disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
|
||||
# ep 112, ep113, ep112, ep113
|
||||
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int},
|
||||
disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'ep-?(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
|
||||
|
||||
# cap 112, cap 112_114
|
||||
rebulk.chain(abbreviations=[dash],
|
||||
tags=['see-pattern'],
|
||||
formatter={'season': int, 'episode': int},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
|
||||
|
||||
# 102, 0102
|
||||
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
|
||||
formatter={'season': int, 'episode': int, 'version': int},
|
||||
name='weak_duplicate',
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
disabled=lambda context: (context.get('episode_prefer_number', False) or
|
||||
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
|
||||
|
||||
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'version'))
|
||||
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
|
||||
|
||||
# TODO: List of words
|
||||
# detached of X count (season/episode)
|
||||
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
|
||||
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
|
||||
abbreviations=[dash], children=True, private_parent=True, formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode",
|
||||
disabled=lambda context: is_disabled(context, 'episode_format'))
|
||||
|
||||
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
|
||||
SeePatternRange(range_separators + ['_']),
|
||||
EpisodeNumberSeparatorRange(range_separators),
|
||||
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
|
||||
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
|
||||
RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class WeakConflictSolver(Rule):
|
||||
"""
|
||||
Rule to decide whether weak-episode or weak-duplicate matches should be kept.
|
||||
|
||||
If an anime is detected:
|
||||
- weak-duplicate matches should be removed
|
||||
- weak-episode matches should be tagged as anime
|
||||
Otherwise:
|
||||
- weak-episode matches are removed unless they're part of an episode range match.
|
||||
"""
|
||||
priority = 128
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def enabled(self, context):
|
||||
return context.get('type') != 'movie'
|
||||
|
||||
@classmethod
|
||||
def is_anime(cls, matches):
|
||||
"""Return True if it seems to be an anime.
|
||||
|
||||
Anime characteristics:
|
||||
- version, crc32 matches
|
||||
- screen_size inside brackets
|
||||
- release_group at start and inside brackets
|
||||
"""
|
||||
if matches.named('version') or matches.named('crc32'):
|
||||
return True
|
||||
|
||||
for group in matches.markers.named('group'):
|
||||
if matches.range(group.start, group.end, predicate=lambda m: m.name == 'screen_size'):
|
||||
return True
|
||||
if matches.markers.starting(group.start, predicate=lambda m: m.name == 'path'):
|
||||
hole = matches.holes(group.start, group.end, index=0)
|
||||
if hole and hole.raw == group.raw:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
anime_detected = self.is_anime(matches)
|
||||
for filepart in matches.markers.named('path'):
|
||||
weak_matches = matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.initiator.name == 'weak_episode'))
|
||||
weak_dup_matches = matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.initiator.name == 'weak_duplicate'))
|
||||
if anime_detected:
|
||||
if weak_matches:
|
||||
to_remove.extend(weak_dup_matches)
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.name == 'episode' and m.initiator.name != 'weak_duplicate')):
|
||||
episode = copy.copy(match)
|
||||
episode.tags = episode.tags + ['anime']
|
||||
to_append.append(episode)
|
||||
to_remove.append(match)
|
||||
elif weak_dup_matches:
|
||||
episodes_in_range = matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m:
|
||||
m.name == 'episode' and m.initiator.name == 'weak_episode'
|
||||
and m.initiator.children.named('episodeSeparator')
|
||||
))
|
||||
if not episodes_in_range and not matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: 'SxxExx' in m.tags):
|
||||
to_remove.extend(weak_matches)
|
||||
else:
|
||||
for match in episodes_in_range:
|
||||
episode = copy.copy(match)
|
||||
episode.tags = []
|
||||
to_append.append(episode)
|
||||
to_remove.append(match)
|
||||
|
||||
if to_append:
|
||||
to_remove.extend(weak_dup_matches)
|
||||
|
||||
return to_remove, to_append
|
||||
|
||||
|
||||
class CountValidator(Rule):
|
||||
"""
|
||||
Validate count property and rename it
|
||||
"""
|
||||
priority = 64
|
||||
consequence = [RemoveMatch, RenameMatch('episode_count'), RenameMatch('season_count')]
|
||||
|
||||
properties = {'episode_count': [None], 'season_count': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
episode_count = []
|
||||
season_count = []
|
||||
|
||||
for count in matches.named('count'):
|
||||
previous = matches.previous(count, lambda match: match.name in ['episode', 'season'], 0)
|
||||
if previous:
|
||||
if previous.name == 'episode':
|
||||
episode_count.append(count)
|
||||
elif previous.name == 'season':
|
||||
season_count.append(count)
|
||||
else:
|
||||
to_remove.append(count)
|
||||
return to_remove, episode_count, season_count
|
||||
|
||||
|
||||
class SeePatternRange(Rule):
|
||||
"""
|
||||
Create matches for episode range for SEE pattern. E.g.: Cap.102_104
|
||||
"""
|
||||
priority = 128
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def __init__(self, range_separators):
|
||||
super(SeePatternRange, self).__init__()
|
||||
self.range_separators = range_separators
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
|
||||
for separator in matches.tagged('see-pattern', lambda m: m.name == 'episodeSeparator'):
|
||||
previous_match = matches.previous(separator, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
|
||||
next_match = matches.next(separator, lambda m: m.name == 'season' and 'see-pattern' in m.tags, 0)
|
||||
if not next_match:
|
||||
continue
|
||||
|
||||
next_match = matches.next(next_match, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
|
||||
if previous_match and next_match and separator.value in self.range_separators:
|
||||
to_remove.append(next_match)
|
||||
|
||||
for episode_number in range(previous_match.value + 1, next_match.value + 1):
|
||||
match = copy.copy(next_match)
|
||||
match.value = episode_number
|
||||
to_append.append(match)
|
||||
|
||||
to_remove.append(separator)
|
||||
|
||||
return to_remove, to_append
|
||||
|
||||
|
||||
class AbstractSeparatorRange(Rule):
|
||||
"""
|
||||
Remove separator matches and create matches for season range.
|
||||
"""
|
||||
priority = 128
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def __init__(self, range_separators, property_name):
|
||||
super(AbstractSeparatorRange, self).__init__()
|
||||
self.range_separators = range_separators
|
||||
self.property_name = property_name
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
|
||||
for separator in matches.named(self.property_name + 'Separator'):
|
||||
previous_match = matches.previous(separator, lambda m: m.name == self.property_name, 0)
|
||||
next_match = matches.next(separator, lambda m: m.name == self.property_name, 0)
|
||||
initiator = separator.initiator
|
||||
|
||||
if previous_match and next_match and separator.value in self.range_separators:
|
||||
to_remove.append(next_match)
|
||||
for episode_number in range(previous_match.value + 1, next_match.value):
|
||||
match = copy.copy(next_match)
|
||||
match.value = episode_number
|
||||
initiator.children.append(match)
|
||||
to_append.append(match)
|
||||
to_append.append(next_match)
|
||||
to_remove.append(separator)
|
||||
|
||||
previous_match = None
|
||||
for next_match in matches.named(self.property_name):
|
||||
if previous_match:
|
||||
separator = matches.input_string[previous_match.initiator.end:next_match.initiator.start]
|
||||
if separator not in self.range_separators:
|
||||
separator = strip(separator)
|
||||
if separator in self.range_separators:
|
||||
initiator = previous_match.initiator
|
||||
for episode_number in range(previous_match.value + 1, next_match.value):
|
||||
match = copy.copy(next_match)
|
||||
match.value = episode_number
|
||||
initiator.children.append(match)
|
||||
to_append.append(match)
|
||||
to_append.append(Match(previous_match.end, next_match.start - 1,
|
||||
name=self.property_name + 'Separator',
|
||||
private=True,
|
||||
input_string=matches.input_string))
|
||||
to_remove.append(next_match) # Remove and append match to support proper ordering
|
||||
to_append.append(next_match)
|
||||
|
||||
previous_match = next_match
|
||||
|
||||
return to_remove, to_append
|
||||
|
||||
|
||||
class RenameToAbsoluteEpisode(Rule):
|
||||
"""
|
||||
Rename episode to absolute_episodes.
|
||||
|
||||
Absolute episodes are only used if two groups of episodes are detected:
|
||||
S02E04-06 25-27
|
||||
25-27 S02E04-06
|
||||
2x04-06 25-27
|
||||
28. Anime Name S02E05
|
||||
The matches in the group with higher episode values are renamed to absolute_episode.
|
||||
"""
|
||||
|
||||
consequence = RenameMatch('absolute_episode')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
initiators = {match.initiator for match in matches.named('episode')
|
||||
if len(match.initiator.children.named('episode')) > 1}
|
||||
if len(initiators) != 2:
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
if matches.range(filepart.start + 1, filepart.end, predicate=lambda m: m.name == 'episode'):
|
||||
ret.extend(
|
||||
matches.starting(filepart.start, predicate=lambda m: m.initiator.name == 'weak_episode'))
|
||||
return ret
|
||||
|
||||
initiators = sorted(initiators, key=lambda item: item.end)
|
||||
if not matches.holes(initiators[0].end, initiators[1].start, predicate=lambda m: m.raw.strip(seps)):
|
||||
first_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[0])
|
||||
second_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[1])
|
||||
if len(first_range) == len(second_range):
|
||||
if second_range[0].value > first_range[0].value:
|
||||
return second_range
|
||||
if first_range[0].value > second_range[0].value:
|
||||
return first_range
|
||||
|
||||
|
||||
class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
|
||||
"""
|
||||
Remove separator matches and create matches for episoderNumber range.
|
||||
"""
|
||||
|
||||
def __init__(self, range_separators):
|
||||
super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode")
|
||||
|
||||
|
||||
class SeasonSeparatorRange(AbstractSeparatorRange):
|
||||
"""
|
||||
Remove separator matches and create matches for season range.
|
||||
"""
|
||||
|
||||
def __init__(self, range_separators):
|
||||
super(SeasonSeparatorRange, self).__init__(range_separators, "season")
|
||||
|
||||
|
||||
class RemoveWeakIfMovie(Rule):
|
||||
"""
|
||||
Remove weak-episode tagged matches if it seems to be a movie.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return context.get('type') != 'episode'
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_ignore = set()
|
||||
remove = False
|
||||
for filepart in matches.markers.named('path'):
|
||||
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
|
||||
if year:
|
||||
remove = True
|
||||
next_match = matches.range(year.end, filepart.end, predicate=lambda m: m.private, index=0)
|
||||
if (next_match and not matches.holes(year.end, next_match.start, predicate=lambda m: m.raw.strip(seps))
|
||||
and not matches.at_match(next_match, predicate=lambda m: m.name == 'year')):
|
||||
to_ignore.add(next_match.initiator)
|
||||
|
||||
to_ignore.update(matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: len(m.children.named('episode')) > 1))
|
||||
|
||||
to_remove.extend(matches.conflicting(year))
|
||||
if remove:
|
||||
to_remove.extend(matches.tagged('weak-episode', predicate=(
|
||||
lambda m: m.initiator not in to_ignore and 'anime' not in m.tags)))
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveWeak(Rule):
|
||||
"""
|
||||
Remove weak-episode matches which appears after video, source, and audio matches.
|
||||
"""
|
||||
priority = 16
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
|
||||
if weaks:
|
||||
previous = matches.previous(weaks[0], predicate=lambda m: m.name in (
|
||||
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
|
||||
'audio_channels', 'audio_profile'), index=0)
|
||||
if previous and not matches.holes(
|
||||
previous.end, weaks[0].start, predicate=lambda m: m.raw.strip(seps)):
|
||||
to_remove.extend(weaks)
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveWeakIfSxxExx(Rule):
|
||||
"""
|
||||
Remove weak-episode tagged matches if SxxExx pattern is matched.
|
||||
|
||||
Weak episodes at beginning of filepart are kept.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
if matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: not m.private and 'SxxExx' in m.tags):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags):
|
||||
if match.start != filepart.start or match.initiator.name != 'weak_episode':
|
||||
to_remove.append(match)
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveInvalidSeason(Rule):
|
||||
"""
|
||||
Remove invalid season matches.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
strong_season = matches.range(filepart.start, filepart.end, index=0,
|
||||
predicate=lambda m: m.name == 'season'
|
||||
and not m.private and 'SxxExx' in m.tags)
|
||||
if strong_season:
|
||||
if strong_season.initiator.children.named('episode'):
|
||||
for season in matches.range(strong_season.end, filepart.end,
|
||||
predicate=lambda m: m.name == 'season' and not m.private):
|
||||
# remove weak season or seasons without episode matches
|
||||
if 'SxxExx' not in season.tags or not season.initiator.children.named('episode'):
|
||||
if season.initiator:
|
||||
to_remove.append(season.initiator)
|
||||
to_remove.extend(season.initiator.children)
|
||||
else:
|
||||
to_remove.append(season)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveInvalidEpisode(Rule):
|
||||
"""
|
||||
Remove invalid episode matches.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
strong_episode = matches.range(filepart.start, filepart.end, index=0,
|
||||
predicate=lambda m: m.name == 'episode'
|
||||
and not m.private and 'SxxExx' in m.tags)
|
||||
if strong_episode:
|
||||
strong_ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, strong_episode)
|
||||
for episode in matches.range(strong_episode.end, filepart.end,
|
||||
predicate=lambda m: m.name == 'episode' and not m.private):
|
||||
ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, episode)
|
||||
if strong_ep_marker and ep_marker and strong_ep_marker.value.lower() != ep_marker.value.lower():
|
||||
if episode.initiator:
|
||||
to_remove.append(episode.initiator)
|
||||
to_remove.extend(episode.initiator.children)
|
||||
else:
|
||||
to_remove.append(ep_marker)
|
||||
to_remove.append(episode)
|
||||
|
||||
return to_remove
|
||||
|
||||
@staticmethod
|
||||
def get_episode_prefix(matches, episode):
|
||||
"""
|
||||
Return episode prefix: episodeMarker or episodeSeparator
|
||||
"""
|
||||
return matches.previous(episode, index=0,
|
||||
predicate=lambda m: m.name in ('episodeMarker', 'episodeSeparator'))
|
||||
|
||||
|
||||
class RemoveWeakDuplicate(Rule):
|
||||
"""
|
||||
Remove weak-duplicate tagged matches if duplicate patterns, for example The 100.109
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
patterns = defaultdict(list)
|
||||
for match in reversed(matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: 'weak-duplicate' in m.tags)):
|
||||
if match.pattern in patterns[match.name]:
|
||||
to_remove.append(match)
|
||||
else:
|
||||
patterns[match.name].append(match.pattern)
|
||||
return to_remove
|
||||
|
||||
|
||||
class EpisodeDetailValidator(Rule):
|
||||
"""
|
||||
Validate episode_details if they are detached or next to season or episode.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for detail in matches.named('episode_details'):
|
||||
if not seps_surround(detail) \
|
||||
and not matches.previous(detail, lambda match: match.name in ['season', 'episode']) \
|
||||
and not matches.next(detail, lambda match: match.name in ['season', 'episode']):
|
||||
ret.append(detail)
|
||||
return ret
|
||||
|
||||
|
||||
class RemoveDetachedEpisodeNumber(Rule):
|
||||
"""
|
||||
If multiple episode are found, remove those that are not detached from a range and less than 10.
|
||||
|
||||
Fairy Tail 2 - 16-20, 2 should be removed.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
dependency = [RemoveWeakIfSxxExx, RemoveWeakDuplicate]
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
||||
episode_numbers = []
|
||||
episode_values = set()
|
||||
for match in matches.named('episode', lambda m: not m.private and 'weak-episode' in m.tags):
|
||||
if match.value not in episode_values:
|
||||
episode_numbers.append(match)
|
||||
episode_values.add(match.value)
|
||||
|
||||
episode_numbers = list(sorted(episode_numbers, key=lambda m: m.value))
|
||||
if len(episode_numbers) > 1 and \
|
||||
episode_numbers[0].value < 10 and \
|
||||
episode_numbers[1].value - episode_numbers[0].value != 1:
|
||||
parent = episode_numbers[0]
|
||||
while parent: # TODO: Add a feature in rebulk to avoid this ...
|
||||
ret.append(parent)
|
||||
parent = parent.parent
|
||||
return ret
|
||||
|
||||
|
||||
class VersionValidator(Rule):
|
||||
"""
|
||||
Validate version if previous match is episode or if surrounded by separators.
|
||||
"""
|
||||
priority = 64
|
||||
dependency = [RemoveWeakIfMovie, RemoveWeakIfSxxExx]
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for version in matches.named('version'):
|
||||
episode_number = matches.previous(version, lambda match: match.name == 'episode', 0)
|
||||
if not episode_number and not seps_surround(version.initiator):
|
||||
ret.append(version)
|
||||
return ret
|
||||
|
||||
|
||||
class EpisodeSingleDigitValidator(Rule):
|
||||
"""
|
||||
Remove single digit episode when inside a group that doesn't own title.
|
||||
"""
|
||||
dependency = [TitleFromPosition]
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for episode in matches.named('episode', lambda match: len(match.initiator) == 1):
|
||||
group = matches.markers.at_match(episode, lambda marker: marker.name == 'group', index=0)
|
||||
if group:
|
||||
if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
|
||||
ret.append(episode)
|
||||
return ret
|
||||
|
||||
|
||||
class RenameToDiscMatch(Rule):
|
||||
"""
|
||||
Rename episodes detected with `d` episodeMarkers to `disc`.
|
||||
"""
|
||||
|
||||
consequence = [RenameMatch('disc'), RenameMatch('discMarker'), RemoveMatch]
|
||||
|
||||
def when(self, matches, context):
|
||||
discs = []
|
||||
markers = []
|
||||
to_remove = []
|
||||
|
||||
disc_disabled = is_disabled(context, 'disc')
|
||||
|
||||
for marker in matches.named('episodeMarker', predicate=lambda m: m.value.lower() == 'd'):
|
||||
if disc_disabled:
|
||||
to_remove.append(marker)
|
||||
to_remove.extend(marker.initiator.children)
|
||||
continue
|
||||
|
||||
markers.append(marker)
|
||||
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
|
||||
|
||||
return discs, markers, to_remove
|
48
libs/common/guessit/rules/properties/film.py
Normal file
48
libs/common/guessit/rules/properties/film.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
film property
|
||||
"""
|
||||
from rebulk import Rebulk, AppendMatch, Rule
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def film(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
|
||||
|
||||
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'film'))
|
||||
|
||||
rebulk.rules(FilmTitleRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class FilmTitleRule(Rule):
|
||||
"""
|
||||
Rule to find out film_title (hole after film property
|
||||
"""
|
||||
consequence = AppendMatch
|
||||
|
||||
properties = {'film_title': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'film_title')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
bonus_number = matches.named('film', lambda match: not match.private, index=0)
|
||||
if bonus_number:
|
||||
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
||||
hole = matches.holes(filepath.start, bonus_number.start + 1, formatter=cleanup, index=0)
|
||||
if hole and hole.value:
|
||||
hole.name = 'film_title'
|
||||
return hole
|
503
libs/common/guessit/rules/properties/language.py
Normal file
503
libs/common/guessit/rules/properties/language.py
Normal file
|
@ -0,0 +1,503 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
language and subtitle_language properties
|
||||
"""
|
||||
# pylint: disable=no-member
|
||||
import copy
|
||||
from collections import defaultdict, namedtuple
|
||||
|
||||
import babelfish
|
||||
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common import seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.words import iter_words
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def language(config, common_words):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:param common_words: common words
|
||||
:type common_words: set
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
subtitle_both = config['subtitle_affixes']
|
||||
subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator)
|
||||
subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator)
|
||||
lang_both = config['language_affixes']
|
||||
lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator)
|
||||
lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator)
|
||||
weak_affixes = frozenset(config['weak_affixes'])
|
||||
|
||||
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and
|
||||
is_disabled(context, 'subtitle_language')))
|
||||
|
||||
rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True,
|
||||
validator=seps_surround, tags=['release-group-prefix'],
|
||||
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
||||
rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True,
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
||||
rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True,
|
||||
validator=seps_surround, tags=['source-suffix'],
|
||||
disabled=lambda context: is_disabled(context, 'language'))
|
||||
|
||||
def find_languages(string, context=None):
|
||||
"""Find languages in the string
|
||||
|
||||
:return: list of tuple (property, Language, lang_word, word)
|
||||
"""
|
||||
return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes,
|
||||
lang_prefixes, lang_suffixes, weak_affixes).find(string)
|
||||
|
||||
rebulk.functional(find_languages,
|
||||
properties={'language': [None]},
|
||||
disabled=lambda context: not context.get('allowed_languages'))
|
||||
rebulk.rules(SubtitleExtensionRule,
|
||||
SubtitlePrefixLanguageRule,
|
||||
SubtitleSuffixLanguageRule,
|
||||
RemoveLanguage,
|
||||
RemoveInvalidLanguages(common_words))
|
||||
|
||||
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
UNDETERMINED = babelfish.Language('und')
|
||||
|
||||
|
||||
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
|
||||
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
|
||||
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
|
||||
|
||||
def __init__(self, synonyms):
|
||||
self.guessit_exceptions = {}
|
||||
for code, synlist in synonyms.items():
|
||||
if '_' in code:
|
||||
(alpha3, country) = code.split('_')
|
||||
else:
|
||||
(alpha3, country) = (code, None)
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
|
||||
|
||||
@property
|
||||
def codes(self): # pylint: disable=missing-docstring
|
||||
return (babelfish.language_converters['alpha3b'].codes |
|
||||
babelfish.language_converters['alpha2'].codes |
|
||||
babelfish.language_converters['name'].codes |
|
||||
babelfish.language_converters['opensubtitles'].codes |
|
||||
babelfish.country_converters['name'].codes |
|
||||
frozenset(self.guessit_exceptions.keys()))
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
return str(babelfish.Language(alpha3, country, script))
|
||||
|
||||
def reverse(self, name): # pylint:disable=arguments-differ
|
||||
name = name.lower()
|
||||
# exceptions come first, as they need to override a potential match
|
||||
# with any of the other guessers
|
||||
try:
|
||||
return self.guessit_exceptions[name]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for conv in [babelfish.Language,
|
||||
babelfish.Language.fromalpha3b,
|
||||
babelfish.Language.fromalpha2,
|
||||
babelfish.Language.fromname,
|
||||
babelfish.Language.fromopensubtitles,
|
||||
babelfish.Language.fromietf]:
|
||||
try:
|
||||
reverse = conv(name)
|
||||
return reverse.alpha3, reverse.country, reverse.script
|
||||
except (ValueError, babelfish.LanguageReverseError):
|
||||
pass
|
||||
|
||||
raise babelfish.LanguageReverseError(name)
|
||||
|
||||
|
||||
def length_comparator(value):
|
||||
"""
|
||||
Return value length.
|
||||
"""
|
||||
return len(value)
|
||||
|
||||
|
||||
_LanguageMatch = namedtuple('_LanguageMatch', ['property_name', 'word', 'lang'])
|
||||
|
||||
|
||||
class LanguageWord(object):
|
||||
"""
|
||||
Extension to the Word namedtuple in order to create compound words.
|
||||
|
||||
E.g.: pt-BR, soft subtitles, custom subs
|
||||
"""
|
||||
|
||||
def __init__(self, start, end, value, input_string, next_word=None):
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.value = value
|
||||
self.input_string = input_string
|
||||
self.next_word = next_word
|
||||
|
||||
@property
|
||||
def extended_word(self): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Return the extended word for this instance, if any.
|
||||
"""
|
||||
if self.next_word:
|
||||
separator = self.input_string[self.end:self.next_word.start]
|
||||
next_separator = self.input_string[self.next_word.end:self.next_word.end + 1]
|
||||
|
||||
if (separator == '-' and separator != next_separator) or separator in (' ', '.'):
|
||||
value = self.input_string[self.start:self.next_word.end].replace('.', ' ')
|
||||
|
||||
return LanguageWord(self.start, self.next_word.end, value, self.input_string, self.next_word.next_word)
|
||||
|
||||
def __repr__(self):
|
||||
return '<({start},{end}): {value}'.format(start=self.start, end=self.end, value=self.value)
|
||||
|
||||
|
||||
def to_rebulk_match(language_match):
|
||||
"""
|
||||
Convert language match to rebulk Match: start, end, dict
|
||||
"""
|
||||
word = language_match.word
|
||||
start = word.start
|
||||
end = word.end
|
||||
name = language_match.property_name
|
||||
if language_match.lang == UNDETERMINED:
|
||||
return start, end, {
|
||||
'name': name,
|
||||
'value': word.value.lower(),
|
||||
'formatter': babelfish.Language,
|
||||
'tags': ['weak-language']
|
||||
}
|
||||
|
||||
return start, end, {
|
||||
'name': name,
|
||||
'value': language_match.lang
|
||||
}
|
||||
|
||||
|
||||
class LanguageFinder(object):
|
||||
"""
|
||||
Helper class to search and return language matches: 'language' and 'subtitle_language' properties
|
||||
"""
|
||||
|
||||
def __init__(self, context,
|
||||
subtitle_prefixes, subtitle_suffixes,
|
||||
lang_prefixes, lang_suffixes, weak_affixes):
|
||||
allowed_languages = context.get('allowed_languages') if context else None
|
||||
self.allowed_languages = {l.lower() for l in allowed_languages or []}
|
||||
self.weak_affixes = weak_affixes
|
||||
self.prefixes_map = {}
|
||||
self.suffixes_map = {}
|
||||
|
||||
if not is_disabled(context, 'subtitle_language'):
|
||||
self.prefixes_map['subtitle_language'] = subtitle_prefixes
|
||||
self.suffixes_map['subtitle_language'] = subtitle_suffixes
|
||||
|
||||
self.prefixes_map['language'] = lang_prefixes
|
||||
self.suffixes_map['language'] = lang_suffixes
|
||||
|
||||
def find(self, string):
|
||||
"""
|
||||
Return all matches for language and subtitle_language.
|
||||
|
||||
Undetermined language matches are removed if a regular language is found.
|
||||
Multi language matches are removed if there are only undetermined language matches
|
||||
"""
|
||||
regular_lang_map = defaultdict(set)
|
||||
undetermined_map = defaultdict(set)
|
||||
multi_map = defaultdict(set)
|
||||
|
||||
for match in self.iter_language_matches(string):
|
||||
key = match.property_name
|
||||
if match.lang == UNDETERMINED:
|
||||
undetermined_map[key].add(match)
|
||||
elif match.lang == 'mul':
|
||||
multi_map[key].add(match)
|
||||
else:
|
||||
regular_lang_map[key].add(match)
|
||||
|
||||
for key, values in multi_map.items():
|
||||
if key in regular_lang_map or key not in undetermined_map:
|
||||
for value in values:
|
||||
yield to_rebulk_match(value)
|
||||
|
||||
for key, values in undetermined_map.items():
|
||||
if key not in regular_lang_map:
|
||||
for value in values:
|
||||
yield to_rebulk_match(value)
|
||||
|
||||
for values in regular_lang_map.values():
|
||||
for value in values:
|
||||
yield to_rebulk_match(value)
|
||||
|
||||
def iter_language_matches(self, string):
|
||||
"""
|
||||
Return language matches for the given string.
|
||||
"""
|
||||
candidates = []
|
||||
previous = None
|
||||
for word in iter_words(string):
|
||||
language_word = LanguageWord(start=word.span[0], end=word.span[1], value=word.value, input_string=string)
|
||||
if previous:
|
||||
previous.next_word = language_word
|
||||
candidates.append(previous)
|
||||
previous = language_word
|
||||
if previous:
|
||||
candidates.append(previous)
|
||||
|
||||
for candidate in candidates:
|
||||
for match in self.iter_matches_for_candidate(candidate):
|
||||
yield match
|
||||
|
||||
def iter_matches_for_candidate(self, language_word):
|
||||
"""
|
||||
Return language matches for the given candidate word.
|
||||
"""
|
||||
tuples = [
|
||||
(language_word, language_word.next_word,
|
||||
self.prefixes_map,
|
||||
lambda string, prefix: string.startswith(prefix),
|
||||
lambda string, prefix: string[len(prefix):]),
|
||||
(language_word.next_word, language_word,
|
||||
self.suffixes_map,
|
||||
lambda string, suffix: string.endswith(suffix),
|
||||
lambda string, suffix: string[:len(string) - len(suffix)])
|
||||
]
|
||||
|
||||
for word, fallback_word, affixes, is_affix, strip_affix in tuples:
|
||||
if not word:
|
||||
continue
|
||||
|
||||
match = self.find_match_for_word(word, fallback_word, affixes, is_affix, strip_affix)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
match = self.find_language_match_for_word(language_word)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Return the language match for the given word and affixes.
|
||||
"""
|
||||
for current_word in (word.extended_word, word):
|
||||
if not current_word:
|
||||
continue
|
||||
|
||||
word_lang = current_word.value.lower()
|
||||
|
||||
for key, parts in affixes.items():
|
||||
for part in parts:
|
||||
if not is_affix(word_lang, part):
|
||||
continue
|
||||
|
||||
match = None
|
||||
value = strip_affix(word_lang, part)
|
||||
if not value:
|
||||
if fallback_word and (
|
||||
abs(fallback_word.start - word.end) <= 1 or abs(word.start - fallback_word.end) <= 1):
|
||||
match = self.find_language_match_for_word(fallback_word, key=key)
|
||||
|
||||
if not match and part not in self.weak_affixes:
|
||||
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
||||
'und', current_word.input_string))
|
||||
else:
|
||||
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
||||
value, current_word.input_string))
|
||||
|
||||
if match:
|
||||
return match
|
||||
|
||||
def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Return the language match for the given word.
|
||||
"""
|
||||
for current_word in (word.extended_word, word):
|
||||
if current_word:
|
||||
match = self.create_language_match(key, current_word)
|
||||
if match:
|
||||
return match
|
||||
|
||||
def create_language_match(self, key, word): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Create a LanguageMatch for a given word
|
||||
"""
|
||||
lang = self.parse_language(word.value.lower())
|
||||
|
||||
if lang is not None:
|
||||
return _LanguageMatch(property_name=key, word=word, lang=lang)
|
||||
|
||||
def parse_language(self, lang_word): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Parse the lang_word into a valid Language.
|
||||
|
||||
Multi and Undetermined languages are also valid languages.
|
||||
"""
|
||||
try:
|
||||
lang = babelfish.Language.fromguessit(lang_word)
|
||||
if ((hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) or
|
||||
(hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) or
|
||||
lang.alpha3.lower() in self.allowed_languages):
|
||||
return lang
|
||||
|
||||
except babelfish.Error:
|
||||
pass
|
||||
|
||||
|
||||
class SubtitlePrefixLanguageRule(Rule):
|
||||
"""
|
||||
Convert language guess as subtitle_language if previous match is a subtitle language prefix
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
properties = {'subtitle_language': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'subtitle_language')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_rename = []
|
||||
to_remove = matches.named('subtitle_language.prefix')
|
||||
for lang in matches.named('language'):
|
||||
prefix = matches.previous(lang, lambda match: match.name == 'subtitle_language.prefix', 0)
|
||||
if not prefix:
|
||||
group_marker = matches.markers.at_match(lang, lambda marker: marker.name == 'group', 0)
|
||||
if group_marker:
|
||||
# Find prefix if placed just before the group
|
||||
prefix = matches.previous(group_marker, lambda match: match.name == 'subtitle_language.prefix',
|
||||
0)
|
||||
if not prefix:
|
||||
# Find prefix if placed before in the group
|
||||
prefix = matches.range(group_marker.start, lang.start,
|
||||
lambda match: match.name == 'subtitle_language.prefix', 0)
|
||||
if prefix:
|
||||
to_rename.append((prefix, lang))
|
||||
to_remove.extend(matches.conflicting(lang))
|
||||
if prefix in to_remove:
|
||||
to_remove.remove(prefix)
|
||||
return to_rename, to_remove
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
to_rename, to_remove = when_response
|
||||
super(SubtitlePrefixLanguageRule, self).then(matches, to_remove, context)
|
||||
for prefix, match in to_rename:
|
||||
# Remove suffix equivalent of prefix.
|
||||
suffix = copy.copy(prefix)
|
||||
suffix.name = 'subtitle_language.suffix'
|
||||
if suffix in matches:
|
||||
matches.remove(suffix)
|
||||
matches.remove(match)
|
||||
match.name = 'subtitle_language'
|
||||
matches.append(match)
|
||||
|
||||
|
||||
class SubtitleSuffixLanguageRule(Rule):
|
||||
"""
|
||||
Convert language guess as subtitle_language if next match is a subtitle language suffix
|
||||
"""
|
||||
dependency = SubtitlePrefixLanguageRule
|
||||
consequence = RemoveMatch
|
||||
|
||||
properties = {'subtitle_language': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'subtitle_language')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_append = []
|
||||
to_remove = matches.named('subtitle_language.suffix')
|
||||
for lang in matches.named('language'):
|
||||
suffix = matches.next(lang, lambda match: match.name == 'subtitle_language.suffix', 0)
|
||||
if suffix:
|
||||
to_append.append(lang)
|
||||
if suffix in to_remove:
|
||||
to_remove.remove(suffix)
|
||||
return to_append, to_remove
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
to_rename, to_remove = when_response
|
||||
super(SubtitleSuffixLanguageRule, self).then(matches, to_remove, context)
|
||||
for match in to_rename:
|
||||
matches.remove(match)
|
||||
match.name = 'subtitle_language'
|
||||
matches.append(match)
|
||||
|
||||
|
||||
class SubtitleExtensionRule(Rule):
|
||||
"""
|
||||
Convert language guess as subtitle_language if next match is a subtitle extension.
|
||||
|
||||
Since it's a strong match, it also removes any conflicting source with it.
|
||||
"""
|
||||
consequence = [RemoveMatch, RenameMatch('subtitle_language')]
|
||||
|
||||
properties = {'subtitle_language': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'subtitle_language')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
subtitle_extension = matches.named('container',
|
||||
lambda match: 'extension' in match.tags and 'subtitle' in match.tags,
|
||||
0)
|
||||
if subtitle_extension:
|
||||
subtitle_lang = matches.previous(subtitle_extension, lambda match: match.name == 'language', 0)
|
||||
if subtitle_lang:
|
||||
for weak in matches.named('subtitle_language', predicate=lambda m: 'weak-language' in m.tags):
|
||||
weak.private = True
|
||||
|
||||
return matches.conflicting(subtitle_lang, lambda m: m.name == 'source'), subtitle_lang
|
||||
|
||||
|
||||
class RemoveLanguage(Rule):
|
||||
"""Remove language matches that were not converted to subtitle_language when language is disabled."""
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return is_disabled(context, 'language')
|
||||
|
||||
def when(self, matches, context):
|
||||
return matches.named('language')
|
||||
|
||||
|
||||
class RemoveInvalidLanguages(Rule):
|
||||
"""Remove language matches that matches the blacklisted common words."""
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
def __init__(self, common_words):
|
||||
"""Constructor."""
|
||||
super(RemoveInvalidLanguages, self).__init__()
|
||||
self.common_words = common_words
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for match in matches.range(0, len(matches.input_string),
|
||||
predicate=lambda m: m.name in ('language', 'subtitle_language')):
|
||||
if match.raw.lower() not in self.common_words:
|
||||
continue
|
||||
|
||||
group = matches.markers.at_match(match, index=0, predicate=lambda m: m.name == 'group')
|
||||
if group and (
|
||||
not matches.range(
|
||||
group.start, group.end, predicate=lambda m: m.name not in ('language', 'subtitle_language')
|
||||
) and (not matches.holes(group.start, group.end, predicate=lambda m: m.value.strip(seps)))):
|
||||
continue
|
||||
|
||||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
55
libs/common/guessit/rules/properties/mimetype.py
Normal file
55
libs/common/guessit/rules/properties/mimetype.py
Normal file
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
mimetype property
|
||||
"""
|
||||
import mimetypes
|
||||
|
||||
from rebulk import Rebulk, CustomRule, POST_PROCESS
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ...rules.processors import Processors
|
||||
|
||||
|
||||
def mimetype(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'mimetype'))
|
||||
rebulk.rules(Mimetype)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class Mimetype(CustomRule):
|
||||
"""
|
||||
Mimetype post processor
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
dependency = Processors
|
||||
|
||||
def when(self, matches, context):
|
||||
mime, _ = mimetypes.guess_type(matches.input_string, strict=False)
|
||||
return mime
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
mime = when_response
|
||||
matches.append(Match(len(matches.input_string), len(matches.input_string), name='mimetype', value=mime))
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties for this rule.
|
||||
"""
|
||||
return {'mimetype': [None]}
|
356
libs/common/guessit/rules/properties/other.py
Normal file
356
libs/common/guessit/rules/properties/other.py
Normal file
|
@ -0,0 +1,356 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
other property
|
||||
"""
|
||||
import copy
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common import dash
|
||||
from ..common import seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_after, seps_before, seps_surround, compose
|
||||
from ...reutils import build_or_pattern
|
||||
from ...rules.common.formatters import raw_cleanup
|
||||
|
||||
|
||||
def other(config): # pylint:disable=unused-argument,too-many-statements
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="other", validator=seps_surround)
|
||||
|
||||
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
|
||||
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
|
||||
rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
|
||||
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
|
||||
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
|
||||
|
||||
rebulk.string('Proper', 'Repack', 'Rerip', value='Proper',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
|
||||
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
||||
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
|
||||
'streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
|
||||
rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
|
||||
rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')
|
||||
|
||||
season_words = build_or_pattern(["seasons?", "series?"])
|
||||
complete_articles = build_or_pattern(["The"])
|
||||
|
||||
def validate_complete(match):
|
||||
"""
|
||||
Make sure season word is are defined.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
children = match.children
|
||||
if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'):
|
||||
return False
|
||||
return True
|
||||
|
||||
rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' +
|
||||
'(?P<completeWordsBefore>' + season_words + '-)?' +
|
||||
'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?',
|
||||
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
|
||||
value={'other': 'Complete'},
|
||||
tags=['release-group-prefix'],
|
||||
validator={'__parent__': compose(seps_surround, validate_complete)})
|
||||
rebulk.string('R5', value='Region 5')
|
||||
rebulk.string('RC', value='Region C')
|
||||
rebulk.regex('Pre-?Air', value='Preair')
|
||||
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
|
||||
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
|
||||
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
|
||||
|
||||
for value in ('Screener', 'Remux', '3D', 'PAL', 'SECAM', 'NTSC', 'XXX'):
|
||||
rebulk.string(value, value=value)
|
||||
|
||||
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
|
||||
rebulk.string('HR', value='High Resolution')
|
||||
rebulk.string('LD', value='Line Dubbed')
|
||||
rebulk.string('MD', value='Mic Dubbed')
|
||||
rebulk.string('mHD', 'HDLight', value='Micro HD')
|
||||
rebulk.string('LDTV', value='Low Definition')
|
||||
rebulk.string('HFR', value='High Frame Rate')
|
||||
rebulk.string('HD', value='HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Upscaled?', value='Upscaled')
|
||||
|
||||
for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
|
||||
'Colorized', 'Internal'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
|
||||
rebulk.regex('Read-?NFO', value='Read NFO')
|
||||
rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
|
||||
rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
|
||||
rebulk.string('OM', value='Open Matte', tags='has-neighbor')
|
||||
rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
|
||||
rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
|
||||
rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix'])
|
||||
|
||||
for coast in ('East', 'West'):
|
||||
rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')
|
||||
|
||||
rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
|
||||
rebulk.string('Ova', 'Oav', value='Original Animated Video')
|
||||
|
||||
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
|
||||
tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
|
||||
rebulk.string('Mux', value='Mux', validator=seps_after,
|
||||
tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
|
||||
rebulk.string('HC', 'vost', value='Hardcoded Subtitles')
|
||||
|
||||
rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
|
||||
rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
|
||||
rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
|
||||
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
|
||||
|
||||
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
|
||||
|
||||
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
|
||||
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
|
||||
ValidateAtEnd, ProperCountRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class ProperCountRule(Rule):
|
||||
"""
|
||||
Add proper_count property
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
consequence = AppendMatch
|
||||
|
||||
properties = {'proper_count': [None]}
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
propers = matches.named('other', lambda match: match.value == 'Proper')
|
||||
if propers:
|
||||
raws = {} # Count distinct raw values
|
||||
for proper in propers:
|
||||
raws[raw_cleanup(proper.raw)] = proper
|
||||
proper_count_match = copy.copy(propers[-1])
|
||||
proper_count_match.name = 'proper_count'
|
||||
|
||||
value = 0
|
||||
for raw in raws.values():
|
||||
value += 2 if 'real' in raw.tags else 1
|
||||
|
||||
proper_count_match.value = value
|
||||
return proper_count_match
|
||||
|
||||
|
||||
class RenameAnotherToOther(Rule):
|
||||
"""
|
||||
Rename `another` properties to `other`
|
||||
"""
|
||||
priority = 32
|
||||
consequence = RenameMatch('other')
|
||||
|
||||
def when(self, matches, context):
|
||||
return matches.named('another')
|
||||
|
||||
|
||||
class ValidateHasNeighbor(Rule):
|
||||
"""
|
||||
Validate tag has-neighbor
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for to_check in matches.range(predicate=lambda match: 'has-neighbor' in match.tags):
|
||||
previous_match = matches.previous(to_check, index=0)
|
||||
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
|
||||
if previous_group and (not previous_match or previous_group.end > previous_match.end):
|
||||
previous_match = previous_group
|
||||
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
|
||||
break
|
||||
next_match = matches.next(to_check, index=0)
|
||||
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
|
||||
if next_group and (not next_match or next_group.start < next_match.start):
|
||||
next_match = next_group
|
||||
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
|
||||
break
|
||||
ret.append(to_check)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateHasNeighborBefore(Rule):
|
||||
"""
|
||||
Validate tag has-neighbor-before that previous match exists.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for to_check in matches.range(predicate=lambda match: 'has-neighbor-before' in match.tags):
|
||||
next_match = matches.next(to_check, index=0)
|
||||
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
|
||||
if next_group and (not next_match or next_group.start < next_match.start):
|
||||
next_match = next_group
|
||||
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
|
||||
break
|
||||
ret.append(to_check)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateHasNeighborAfter(Rule):
|
||||
"""
|
||||
Validate tag has-neighbor-after that next match exists.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for to_check in matches.range(predicate=lambda match: 'has-neighbor-after' in match.tags):
|
||||
previous_match = matches.previous(to_check, index=0)
|
||||
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
|
||||
if previous_group and (not previous_match or previous_group.end > previous_match.end):
|
||||
previous_match = previous_group
|
||||
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
|
||||
break
|
||||
ret.append(to_check)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateScreenerRule(Rule):
|
||||
"""
|
||||
Validate tag other.validate.screener
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for screener in matches.named('other', lambda match: 'other.validate.screener' in match.tags):
|
||||
source_match = matches.previous(screener, lambda match: match.initiator.name == 'source', 0)
|
||||
if not source_match or matches.input_string[source_match.end:screener.start].strip(seps):
|
||||
ret.append(screener)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateMuxRule(Rule):
|
||||
"""
|
||||
Validate tag other.validate.mux
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for mux in matches.named('other', lambda match: 'other.validate.mux' in match.tags):
|
||||
source_match = matches.previous(mux, lambda match: match.initiator.name == 'source', 0)
|
||||
if not source_match:
|
||||
ret.append(mux)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateHardcodedSubs(Rule):
|
||||
"""Validate HC matches."""
|
||||
|
||||
priority = 32
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for hc_match in matches.named('other', predicate=lambda match: match.value == 'Hardcoded Subtitles'):
|
||||
next_match = matches.next(hc_match, predicate=lambda match: match.name == 'subtitle_language', index=0)
|
||||
if next_match and not matches.holes(hc_match.end, next_match.start,
|
||||
predicate=lambda match: match.value.strip(seps)):
|
||||
continue
|
||||
|
||||
previous_match = matches.previous(hc_match,
|
||||
predicate=lambda match: match.name == 'subtitle_language', index=0)
|
||||
if previous_match and not matches.holes(previous_match.end, hc_match.start,
|
||||
predicate=lambda match: match.value.strip(seps)):
|
||||
continue
|
||||
|
||||
to_remove.append(hc_match)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ValidateStreamingServiceNeighbor(Rule):
|
||||
"""Validate streaming service's neighbors."""
|
||||
|
||||
priority = 32
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for match in matches.named('other',
|
||||
predicate=lambda m: (m.initiator.name != 'source'
|
||||
and ('streaming_service.prefix' in m.tags
|
||||
or 'streaming_service.suffix' in m.tags))):
|
||||
match = match.initiator
|
||||
if not seps_after(match):
|
||||
if 'streaming_service.prefix' in match.tags:
|
||||
next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0)
|
||||
if next_match and not matches.holes(match.end, next_match.start,
|
||||
predicate=lambda m: m.value.strip(seps)):
|
||||
continue
|
||||
if match.children:
|
||||
to_remove.extend(match.children)
|
||||
to_remove.append(match)
|
||||
|
||||
elif not seps_before(match):
|
||||
if 'streaming_service.suffix' in match.tags:
|
||||
previous_match = matches.previous(match, lambda m: m.name == 'streaming_service', 0)
|
||||
if previous_match and not matches.holes(previous_match.end, match.start,
|
||||
predicate=lambda m: m.value.strip(seps)):
|
||||
continue
|
||||
|
||||
if match.children:
|
||||
to_remove.extend(match.children)
|
||||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ValidateAtEnd(Rule):
|
||||
"""Validate other which should occur at the end of a filepart."""
|
||||
|
||||
priority = 32
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: m.name == 'other' and 'at-end' in m.tags):
|
||||
if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or
|
||||
matches.range(match.end, filepart.end, predicate=lambda m: m.name not in (
|
||||
'other', 'container'))):
|
||||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
46
libs/common/guessit/rules/properties/part.py
Normal file
46
libs/common/guessit/rules/properties/part.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
part property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround, int_coercable, compose
|
||||
from ..common.numeral import numeral, parse_numeral
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def part(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part'))
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
|
||||
|
||||
prefixes = config['prefixes']
|
||||
|
||||
def validate_roman(match):
|
||||
"""
|
||||
Validate a roman match if surrounded by separators
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if int_coercable(match.raw):
|
||||
return True
|
||||
return seps_surround(match)
|
||||
|
||||
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
|
||||
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
|
||||
validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
|
||||
|
||||
return rebulk
|
331
libs/common/guessit/rules/properties/release_group.py
Normal file
331
libs/common/guessit/rules/properties/release_group.py
Normal file
|
@ -0,0 +1,331 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
release_group property
|
||||
"""
|
||||
import copy
|
||||
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..common import seps
|
||||
from ..common.expected import build_expected_function
|
||||
from ..common.comparators import marker_sorted
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import int_coercable, seps_surround
|
||||
from ..properties.title import TitleFromPosition
|
||||
|
||||
|
||||
def release_group(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
forbidden_groupnames = config['forbidden_names']
|
||||
|
||||
groupname_ignore_seps = config['ignored_seps']
|
||||
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
|
||||
|
||||
def clean_groupname(string):
|
||||
"""
|
||||
Removes and strip separators from input_string
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
string = string.strip(groupname_seps)
|
||||
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
|
||||
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
|
||||
string = string.strip(groupname_ignore_seps)
|
||||
for forbidden in forbidden_groupnames:
|
||||
if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps:
|
||||
string = string[len(forbidden):]
|
||||
string = string.strip(groupname_seps)
|
||||
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
|
||||
string = string[:len(forbidden)]
|
||||
string = string.strip(groupname_seps)
|
||||
return string
|
||||
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
|
||||
|
||||
expected_group = build_expected_function('expected_group')
|
||||
|
||||
rebulk.functional(expected_group, name='release_group', tags=['expected'],
|
||||
validator=seps_surround,
|
||||
conflict_solver=lambda match, other: other,
|
||||
disabled=lambda context: not context.get('expected_group'))
|
||||
|
||||
return rebulk.rules(
|
||||
DashSeparatedReleaseGroup(clean_groupname),
|
||||
SceneReleaseGroup(clean_groupname),
|
||||
AnimeReleaseGroup
|
||||
)
|
||||
|
||||
|
||||
_scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
|
||||
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
|
||||
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
|
||||
|
||||
_scene_previous_tags = ('release-group-prefix', )
|
||||
|
||||
|
||||
class DashSeparatedReleaseGroup(Rule):
|
||||
"""
|
||||
Detect dash separated release groups that might appear at the end or at the beginning of a release name.
|
||||
|
||||
Series.S01E02.Pilot.DVDRip.x264-CS.mkv
|
||||
release_group: CS
|
||||
abc-the.title.name.1983.1080p.bluray.x264.mkv
|
||||
release_group: abc
|
||||
|
||||
At the end: Release groups should be dash-separated and shouldn't contain spaces nor
|
||||
appear in a group with other matches. The preceding matches should be separated by dot.
|
||||
If a release group is found, the conflicting matches are removed.
|
||||
|
||||
At the beginning: Release groups should be dash-separated and shouldn't contain spaces nor appear in a group.
|
||||
It should be followed by a hole with dot-separated words.
|
||||
Detection only happens if no matches exist at the beginning.
|
||||
"""
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def __init__(self, value_formatter):
|
||||
"""Default constructor."""
|
||||
super(DashSeparatedReleaseGroup, self).__init__()
|
||||
self.value_formatter = value_formatter
|
||||
|
||||
@classmethod
|
||||
def is_valid(cls, matches, candidate, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Whether a candidate is a valid release group.
|
||||
"""
|
||||
if not at_end:
|
||||
if len(candidate.value) <= 1:
|
||||
return False
|
||||
|
||||
if matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group'):
|
||||
return False
|
||||
|
||||
first_hole = matches.holes(candidate.end, end, predicate=lambda m: m.start == candidate.end, index=0)
|
||||
if not first_hole:
|
||||
return False
|
||||
|
||||
raw_value = first_hole.raw
|
||||
return raw_value[0] == '-' and '-' not in raw_value[1:] and '.' in raw_value and ' ' not in raw_value
|
||||
|
||||
group = matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group', index=0)
|
||||
if group and matches.at_match(group, predicate=lambda m: not m.private and m.span != candidate.span):
|
||||
return False
|
||||
|
||||
count = 0
|
||||
match = candidate
|
||||
while match:
|
||||
current = matches.range(start,
|
||||
match.start,
|
||||
index=-1,
|
||||
predicate=lambda m: not m.private and not 'expected' in m.tags)
|
||||
if not current:
|
||||
break
|
||||
|
||||
separator = match.input_string[current.end:match.start]
|
||||
if not separator and match.raw[0] == '-':
|
||||
separator = '-'
|
||||
|
||||
match = current
|
||||
|
||||
if count == 0:
|
||||
if separator != '-':
|
||||
break
|
||||
|
||||
count += 1
|
||||
continue
|
||||
|
||||
if separator == '.':
|
||||
return True
|
||||
|
||||
def detect(self, matches, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Detect release group at the end or at the beginning of a filepart.
|
||||
"""
|
||||
candidate = None
|
||||
if at_end:
|
||||
container = matches.ending(end, lambda m: m.name == 'container', index=0)
|
||||
if container:
|
||||
end = container.start
|
||||
|
||||
candidate = matches.ending(end, index=0, predicate=(
|
||||
lambda m: not m.private and not (
|
||||
m.name == 'other' and 'not-a-release-group' in m.tags
|
||||
) and '-' not in m.raw and m.raw.strip() == m.raw))
|
||||
|
||||
if not candidate:
|
||||
if at_end:
|
||||
candidate = matches.holes(start, end, seps=seps, index=-1,
|
||||
predicate=lambda m: m.end == end and m.raw.strip(seps) and m.raw[0] == '-')
|
||||
else:
|
||||
candidate = matches.holes(start, end, seps=seps, index=0,
|
||||
predicate=lambda m: m.start == start and m.raw.strip(seps))
|
||||
|
||||
if candidate and self.is_valid(matches, candidate, start, end, at_end):
|
||||
return candidate
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('release_group'):
|
||||
return
|
||||
|
||||
to_remove = []
|
||||
to_append = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
candidate = self.detect(matches, filepart.start, filepart.end, True)
|
||||
if candidate:
|
||||
to_remove.extend(matches.at_match(candidate))
|
||||
else:
|
||||
candidate = self.detect(matches, filepart.start, filepart.end, False)
|
||||
|
||||
if candidate:
|
||||
releasegroup = Match(candidate.start, candidate.end, name='release_group',
|
||||
formatter=self.value_formatter, input_string=candidate.input_string)
|
||||
|
||||
if releasegroup.value:
|
||||
to_append.append(releasegroup)
|
||||
return to_remove, to_append
|
||||
|
||||
|
||||
class SceneReleaseGroup(Rule):
|
||||
"""
|
||||
Add release_group match in existing matches (scene format).
|
||||
|
||||
Something.XViD-ReleaseGroup.mkv
|
||||
"""
|
||||
dependency = [TitleFromPosition]
|
||||
consequence = AppendMatch
|
||||
|
||||
properties = {'release_group': [None]}
|
||||
|
||||
def __init__(self, value_formatter):
|
||||
"""Default constructor."""
|
||||
super(SceneReleaseGroup, self).__init__()
|
||||
self.value_formatter = value_formatter
|
||||
|
||||
def when(self, matches, context): # pylint:disable=too-many-locals
|
||||
# If a release_group is found before, ignore this kind of release_group rule.
|
||||
|
||||
ret = []
|
||||
|
||||
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
||||
# pylint:disable=cell-var-from-loop
|
||||
start, end = filepart.span
|
||||
if matches.named('release_group', predicate=lambda m: m.start >= start and m.end <= end):
|
||||
continue
|
||||
|
||||
titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)
|
||||
|
||||
def keep_only_first_title(match):
|
||||
"""
|
||||
Keep only first title from this filepart, as other ones are most likely release group.
|
||||
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return match in titles[1:]
|
||||
|
||||
last_hole = matches.holes(start, end + 1, formatter=self.value_formatter,
|
||||
ignore=keep_only_first_title,
|
||||
predicate=lambda hole: cleanup(hole.value), index=-1)
|
||||
|
||||
if last_hole:
|
||||
def previous_match_filter(match):
|
||||
"""
|
||||
Filter to apply to find previous match
|
||||
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
if match.start < filepart.start:
|
||||
return False
|
||||
return not match.private or match.name in _scene_previous_names
|
||||
|
||||
previous_match = matches.previous(last_hole,
|
||||
previous_match_filter,
|
||||
index=0)
|
||||
if previous_match and (previous_match.name in _scene_previous_names or
|
||||
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
|
||||
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
|
||||
and not int_coercable(last_hole.value.strip(seps)):
|
||||
|
||||
last_hole.name = 'release_group'
|
||||
last_hole.tags = ['scene']
|
||||
|
||||
# if hole is inside a group marker with same value, remove [](){} ...
|
||||
group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
|
||||
if group:
|
||||
group.formatter = self.value_formatter
|
||||
if group.value == last_hole.value:
|
||||
last_hole.start = group.start + 1
|
||||
last_hole.end = group.end - 1
|
||||
last_hole.tags = ['anime']
|
||||
|
||||
ignored_matches = matches.range(last_hole.start, last_hole.end, keep_only_first_title)
|
||||
|
||||
for ignored_match in ignored_matches:
|
||||
matches.remove(ignored_match)
|
||||
|
||||
ret.append(last_hole)
|
||||
return ret
|
||||
|
||||
|
||||
class AnimeReleaseGroup(Rule):
|
||||
"""
|
||||
Add release_group match in existing matches (anime format)
|
||||
...[ReleaseGroup] Something.mkv
|
||||
"""
|
||||
dependency = [SceneReleaseGroup, TitleFromPosition]
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
properties = {'release_group': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
|
||||
# If a release_group is found before, ignore this kind of release_group rule.
|
||||
if matches.named('release_group'):
|
||||
return to_remove, to_append
|
||||
|
||||
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
|
||||
# This doesn't seems to be an anime, and we already found another release_group.
|
||||
return to_remove, to_append
|
||||
|
||||
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
||||
|
||||
# pylint:disable=bad-continuation
|
||||
empty_group = matches.markers.range(filepart.start,
|
||||
filepart.end,
|
||||
lambda marker: (marker.name == 'group'
|
||||
and not matches.range(marker.start, marker.end,
|
||||
lambda m:
|
||||
'weak-language' not in m.tags)
|
||||
and marker.value.strip(seps)
|
||||
and not int_coercable(marker.value.strip(seps))), 0)
|
||||
|
||||
if empty_group:
|
||||
group = copy.copy(empty_group)
|
||||
group.marker = False
|
||||
group.raw_start += 1
|
||||
group.raw_end -= 1
|
||||
group.tags = ['anime']
|
||||
group.name = 'release_group'
|
||||
to_append.append(group)
|
||||
to_remove.extend(matches.range(empty_group.start, empty_group.end,
|
||||
lambda m: 'weak-language' in m.tags))
|
||||
return to_remove, to_append
|
163
libs/common/guessit/rules/properties/screen_size.py
Normal file
163
libs/common/guessit/rules/properties/screen_size.py
Normal file
|
@ -0,0 +1,163 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
screen_size property
|
||||
"""
|
||||
from rebulk.match import Match
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch, AppendMatch
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.quantity import FrameRate
|
||||
from ..common.validators import seps_surround
|
||||
from ..common import dash, seps
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def screen_size(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
interlaced = frozenset({res for res in config['interlaced']})
|
||||
progressive = frozenset({res for res in config['progressive']})
|
||||
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
|
||||
min_ar = config['min_ar']
|
||||
max_ar = config['max_ar']
|
||||
|
||||
rebulk = Rebulk()
|
||||
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
|
||||
|
||||
rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash],
|
||||
disabled=lambda context: is_disabled(context, 'screen_size'))
|
||||
|
||||
frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
|
||||
interlaced_pattern = build_or_pattern(interlaced, name='height')
|
||||
progressive_pattern = build_or_pattern(progressive, name='height')
|
||||
|
||||
res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
|
||||
rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?')
|
||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
|
||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
|
||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
|
||||
rebulk.string('4k', value='2160p')
|
||||
rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
|
||||
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
|
||||
|
||||
rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
|
||||
formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))
|
||||
|
||||
rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class PostProcessScreenSize(Rule):
|
||||
"""
|
||||
Process the screen size calculating the aspect ratio if available.
|
||||
|
||||
Convert to a standard notation (720p, 1080p, etc) when it's a standard resolution and
|
||||
aspect ratio is valid or not available.
|
||||
|
||||
It also creates an aspect_ratio match when available.
|
||||
"""
|
||||
consequence = AppendMatch
|
||||
|
||||
def __init__(self, standard_heights, min_ar, max_ar):
|
||||
super(PostProcessScreenSize, self).__init__()
|
||||
self.standard_heights = standard_heights
|
||||
self.min_ar = min_ar
|
||||
self.max_ar = max_ar
|
||||
|
||||
def when(self, matches, context):
|
||||
to_append = []
|
||||
for match in matches.named('screen_size'):
|
||||
if not is_disabled(context, 'frame_rate'):
|
||||
for frame_rate in match.children.named('frame_rate'):
|
||||
frame_rate.formatter = FrameRate.fromstring
|
||||
to_append.append(frame_rate)
|
||||
|
||||
values = match.children.to_dict()
|
||||
if 'height' not in values:
|
||||
continue
|
||||
|
||||
scan_type = (values.get('scan_type') or 'p').lower()
|
||||
height = values['height']
|
||||
if 'width' not in values:
|
||||
match.value = '{0}{1}'.format(height, scan_type)
|
||||
continue
|
||||
|
||||
width = values['width']
|
||||
calculated_ar = float(width) / float(height)
|
||||
|
||||
aspect_ratio = Match(match.start, match.end, input_string=match.input_string,
|
||||
name='aspect_ratio', value=round(calculated_ar, 3))
|
||||
|
||||
if not is_disabled(context, 'aspect_ratio'):
|
||||
to_append.append(aspect_ratio)
|
||||
|
||||
if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar:
|
||||
match.value = '{0}{1}'.format(height, scan_type)
|
||||
else:
|
||||
match.value = '{0}x{1}'.format(width, height)
|
||||
|
||||
return to_append
|
||||
|
||||
|
||||
class ScreenSizeOnlyOne(Rule):
|
||||
"""
|
||||
Keep a single screen_size per filepath part.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
screensize = list(reversed(matches.range(filepart.start, filepart.end,
|
||||
lambda match: match.name == 'screen_size')))
|
||||
if len(screensize) > 1 and len(set((match.value for match in screensize))) > 1:
|
||||
to_remove.extend(screensize[1:])
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ResolveScreenSizeConflicts(Rule):
|
||||
"""
|
||||
Resolve screen_size conflicts with season and episode matches.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
screensize = matches.range(filepart.start, filepart.end, lambda match: match.name == 'screen_size', 0)
|
||||
if not screensize:
|
||||
continue
|
||||
|
||||
conflicts = matches.conflicting(screensize, lambda match: match.name in ('season', 'episode'))
|
||||
if not conflicts:
|
||||
continue
|
||||
|
||||
has_neighbor = False
|
||||
video_profile = matches.range(screensize.end, filepart.end, lambda match: match.name == 'video_profile', 0)
|
||||
if video_profile and not matches.holes(screensize.end, video_profile.start,
|
||||
predicate=lambda h: h.value and h.value.strip(seps)):
|
||||
to_remove.extend(conflicts)
|
||||
has_neighbor = True
|
||||
|
||||
previous = matches.previous(screensize, index=0, predicate=(
|
||||
lambda m: m.name in ('date', 'source', 'other', 'streaming_service')))
|
||||
if previous and not matches.holes(previous.end, screensize.start,
|
||||
predicate=lambda h: h.value and h.value.strip(seps)):
|
||||
to_remove.extend(conflicts)
|
||||
has_neighbor = True
|
||||
|
||||
if not has_neighbor:
|
||||
to_remove.append(screensize)
|
||||
|
||||
return to_remove
|
30
libs/common/guessit/rules/properties/size.py
Normal file
30
libs/common/guessit/rules/properties/size.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
size property
|
||||
"""
|
||||
import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
from ..common import dash
|
||||
from ..common.quantity import Size
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def size(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='size', validator=seps_surround)
|
||||
rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])
|
||||
|
||||
return rebulk
|
201
libs/common/guessit/rules/properties/source.py
Normal file
201
libs/common/guessit/rules/properties/source.py
Normal file
|
@ -0,0 +1,201 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
source property
|
||||
"""
|
||||
import copy
|
||||
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
|
||||
|
||||
from .audio_codec import HqConflictRule
|
||||
from ..common import dash, seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_before, seps_after
|
||||
|
||||
|
||||
def source(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
|
||||
rebulk.defaults(name='source', tags=['video-codec-prefix', 'streaming_service.suffix'])
|
||||
|
||||
rip_prefix = '(?P<other>Rip)-?'
|
||||
rip_suffix = '-?(?P<other>Rip)'
|
||||
rip_optional_suffix = '(?:' + rip_suffix + ')?'
|
||||
|
||||
def build_source_pattern(*patterns, **kwargs):
|
||||
"""Helper pattern to build source pattern."""
|
||||
prefix_format = kwargs.get('prefix') or ''
|
||||
suffix_format = kwargs.get('suffix') or ''
|
||||
|
||||
string_format = prefix_format + '({0})' + suffix_format
|
||||
return [string_format.format(pattern) for pattern in patterns]
|
||||
|
||||
def demote_other(match, other): # pylint: disable=unused-argument
|
||||
"""Default conflict solver with 'other' property."""
|
||||
return other if other.name == 'other' else '__default__'
|
||||
|
||||
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
|
||||
value={'source': 'VHS', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
|
||||
value={'source': 'Camera', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD Camera', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
|
||||
value={'source': 'Telesync', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD Telesync', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
|
||||
rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
|
||||
value={'source': 'Telecine', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD Telecine', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
|
||||
value={'source': 'Pay-per-view', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
|
||||
value={'source': 'TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching
|
||||
value={'source': 'TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
|
||||
value={'source': 'TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
|
||||
rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
|
||||
value={'source': 'Digital TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
|
||||
value={'source': 'DVD', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
|
||||
value={'source': 'Digital Master', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ...
|
||||
'DVD-?9', 'DVD-?5'), value='DVD')
|
||||
|
||||
rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'HDTV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'HDTV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
|
||||
value={'source': 'HDTV', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
|
||||
value={'source': 'Video on Demand', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
|
||||
value={'source': 'Web', 'other': 'Rip'})
|
||||
# WEBCap is a synonym to WEBRip, mostly used by non english
|
||||
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
|
||||
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'WEB', 'DL-?WEB', 'DL(?=-?Mux)'),
|
||||
value={'source': 'Web'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD-DVD', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
|
||||
value={'source': 'Blu-ray', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip
|
||||
value={'source': 'Blu-ray', 'another': 'Reencoded'})
|
||||
rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip
|
||||
value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')
|
||||
|
||||
rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
|
||||
rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
|
||||
value={'source': 'Satellite', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
|
||||
value={'source': 'Satellite', 'other': 'Rip'})
|
||||
|
||||
rebulk.rules(ValidateSource, UltraHdBlurayRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class UltraHdBlurayRule(Rule):
|
||||
"""
|
||||
Replace other:Ultra HD and source:Blu-ray with source:Ultra HD Blu-ray
|
||||
"""
|
||||
dependency = HqConflictRule
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
@classmethod
|
||||
def find_ultrahd(cls, matches, start, end, index):
|
||||
"""Find Ultra HD match."""
|
||||
return matches.range(start, end, index=index, predicate=(
|
||||
lambda m: not m.private and m.name == 'other' and m.value == 'Ultra HD'
|
||||
))
|
||||
|
||||
@classmethod
|
||||
def validate_range(cls, matches, start, end):
|
||||
"""Validate no holes or invalid matches exist in the specified range."""
|
||||
return (
|
||||
not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
|
||||
not matches.range(start, end, predicate=(
|
||||
lambda m: not m.private and (
|
||||
m.name not in ('screen_size', 'color_depth') and (
|
||||
m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
|
||||
)
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: not m.private and m.name == 'source' and m.value == 'Blu-ray')):
|
||||
other = self.find_ultrahd(matches, filepart.start, match.start, -1)
|
||||
if not other or not self.validate_range(matches, other.end, match.start):
|
||||
other = self.find_ultrahd(matches, match.end, filepart.end, 0)
|
||||
if not other or not self.validate_range(matches, match.end, other.start):
|
||||
if not matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.name == 'screen_size' and m.value == '2160p')):
|
||||
continue
|
||||
|
||||
if other:
|
||||
other.private = True
|
||||
|
||||
new_source = copy.copy(match)
|
||||
new_source.value = 'Ultra HD Blu-ray'
|
||||
to_remove.append(match)
|
||||
to_append.append(new_source)
|
||||
|
||||
return to_remove, to_append
|
||||
|
||||
|
||||
class ValidateSource(Rule):
|
||||
"""
|
||||
Validate source with screener property, with video_codec property or separated
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for match in matches.named('source'):
|
||||
match = match.initiator
|
||||
if not seps_before(match) and \
|
||||
not matches.range(match.start - 1, match.start - 2,
|
||||
lambda m: 'source-prefix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
if not seps_after(match) and \
|
||||
not matches.range(match.end, match.end + 1,
|
||||
lambda m: 'source-suffix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
return ret
|
198
libs/common/guessit/rules/properties/streaming_service.py
Normal file
198
libs/common/guessit/rules/properties/streaming_service.py
Normal file
|
@ -0,0 +1,198 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
streaming_service property
|
||||
"""
|
||||
import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from rebulk.rules import Rule, RemoveMatch
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ...rules.common import seps, dash
|
||||
from ...rules.common.validators import seps_before, seps_after
|
||||
|
||||
|
||||
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument
|
||||
"""Streaming service property.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return:
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service'))
|
||||
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
|
||||
|
||||
rebulk.string('AE', 'A&E', value='A&E')
|
||||
rebulk.string('AMBC', value='ABC')
|
||||
rebulk.string('AUBC', value='ABC Australia')
|
||||
rebulk.string('AJAZ', value='Al Jazeera English')
|
||||
rebulk.string('AMC', value='AMC')
|
||||
rebulk.string('AMZN', 'Amazon', value='Amazon Prime')
|
||||
rebulk.regex('Amazon-?Prime', value='Amazon Prime')
|
||||
rebulk.string('AS', value='Adult Swim')
|
||||
rebulk.regex('Adult-?Swim', value='Adult Swim')
|
||||
rebulk.string('ATK', value="America's Test Kitchen")
|
||||
rebulk.string('ANPL', value='Animal Planet')
|
||||
rebulk.string('ANLB', value='AnimeLab')
|
||||
rebulk.string('AOL', value='AOL')
|
||||
rebulk.string('ARD', value='ARD')
|
||||
rebulk.string('iP', value='BBC iPlayer')
|
||||
rebulk.regex('BBC-?iPlayer', value='BBC iPlayer')
|
||||
rebulk.string('BRAV', value='BravoTV')
|
||||
rebulk.string('CNLP', value='Canal+')
|
||||
rebulk.string('CN', value='Cartoon Network')
|
||||
rebulk.string('CBC', value='CBC')
|
||||
rebulk.string('CBS', value='CBS')
|
||||
rebulk.string('CNBC', value='CNBC')
|
||||
rebulk.string('CC', value='Comedy Central')
|
||||
rebulk.string('4OD', value='Channel 4')
|
||||
rebulk.string('CHGD', value='CHRGD')
|
||||
rebulk.string('CMAX', value='Cinemax')
|
||||
rebulk.string('CMT', value='Country Music Television')
|
||||
rebulk.regex('Comedy-?Central', value='Comedy Central')
|
||||
rebulk.string('CCGC', value='Comedians in Cars Getting Coffee')
|
||||
rebulk.string('CR', value='Crunchy Roll')
|
||||
rebulk.string('CRKL', value='Crackle')
|
||||
rebulk.regex('Crunchy-?Roll', value='Crunchy Roll')
|
||||
rebulk.string('CSPN', value='CSpan')
|
||||
rebulk.string('CTV', value='CTV')
|
||||
rebulk.string('CUR', value='CuriosityStream')
|
||||
rebulk.string('CWS', value='CWSeed')
|
||||
rebulk.string('DSKI', value='Daisuki')
|
||||
rebulk.string('DHF', value='Deadhouse Films')
|
||||
rebulk.string('DDY', value='Digiturk Diledigin Yerde')
|
||||
rebulk.string('DISC', 'Discovery', value='Discovery')
|
||||
rebulk.string('DSNY', 'Disney', value='Disney')
|
||||
rebulk.string('DIY', value='DIY Network')
|
||||
rebulk.string('DOCC', value='Doc Club')
|
||||
rebulk.string('DPLY', value='DPlay')
|
||||
rebulk.string('ETV', value='E!')
|
||||
rebulk.string('EPIX', value='ePix')
|
||||
rebulk.string('ETTV', value='El Trece')
|
||||
rebulk.string('ESPN', value='ESPN')
|
||||
rebulk.string('ESQ', value='Esquire')
|
||||
rebulk.string('FAM', value='Family')
|
||||
rebulk.string('FJR', value='Family Jr')
|
||||
rebulk.string('FOOD', value='Food Network')
|
||||
rebulk.string('FOX', value='Fox')
|
||||
rebulk.string('FREE', value='Freeform')
|
||||
rebulk.string('FYI', value='FYI Network')
|
||||
rebulk.string('GLBL', value='Global')
|
||||
rebulk.string('GLOB', value='GloboSat Play')
|
||||
rebulk.string('HLMK', value='Hallmark')
|
||||
rebulk.string('HBO', value='HBO Go')
|
||||
rebulk.regex('HBO-?Go', value='HBO Go')
|
||||
rebulk.string('HGTV', value='HGTV')
|
||||
rebulk.string('HIST', 'History', value='History')
|
||||
rebulk.string('HULU', value='Hulu')
|
||||
rebulk.string('ID', value='Investigation Discovery')
|
||||
rebulk.string('IFC', value='IFC')
|
||||
rebulk.string('iTunes', 'iT', value='iTunes')
|
||||
rebulk.string('ITV', value='ITV')
|
||||
rebulk.string('KNOW', value='Knowledge Network')
|
||||
rebulk.string('LIFE', value='Lifetime')
|
||||
rebulk.string('MTOD', value='Motor Trend OnDemand')
|
||||
rebulk.string('MNBC', value='MSNBC')
|
||||
rebulk.string('MTV', value='MTV')
|
||||
rebulk.string('NATG', value='National Geographic')
|
||||
rebulk.regex('National-?Geographic', value='National Geographic')
|
||||
rebulk.string('NBA', value='NBA TV')
|
||||
rebulk.regex('NBA-?TV', value='NBA TV')
|
||||
rebulk.string('NBC', value='NBC')
|
||||
rebulk.string('NF', 'Netflix', value='Netflix')
|
||||
rebulk.string('NFL', value='NFL')
|
||||
rebulk.string('NFLN', value='NFL Now')
|
||||
rebulk.string('GC', value='NHL GameCenter')
|
||||
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
|
||||
rebulk.string('NRK', value='Norsk Rikskringkasting')
|
||||
rebulk.string('PBS', value='PBS')
|
||||
rebulk.string('PBSK', value='PBS Kids')
|
||||
rebulk.string('PSN', value='Playstation Network')
|
||||
rebulk.string('PLUZ', value='Pluzz')
|
||||
rebulk.string('RTE', value='RTE One')
|
||||
rebulk.string('SBS', value='SBS (AU)')
|
||||
rebulk.string('SESO', 'SeeSo', value='SeeSo')
|
||||
rebulk.string('SHMI', value='Shomi')
|
||||
rebulk.string('SPIK', value='Spike')
|
||||
rebulk.string('SPKE', value='Spike TV')
|
||||
rebulk.regex('Spike-?TV', value='Spike TV')
|
||||
rebulk.string('SNET', value='Sportsnet')
|
||||
rebulk.string('SPRT', value='Sprout')
|
||||
rebulk.string('STAN', value='Stan')
|
||||
rebulk.string('STZ', value='Starz')
|
||||
rebulk.string('SVT', value='Sveriges Television')
|
||||
rebulk.string('SWER', value='SwearNet')
|
||||
rebulk.string('SYFY', value='Syfy')
|
||||
rebulk.string('TBS', value='TBS')
|
||||
rebulk.string('TFOU', value='TFou')
|
||||
rebulk.string('CW', value='The CW')
|
||||
rebulk.regex('The-?CW', value='The CW')
|
||||
rebulk.string('TLC', value='TLC')
|
||||
rebulk.string('TUBI', value='TubiTV')
|
||||
rebulk.string('TV3', value='TV3 Ireland')
|
||||
rebulk.string('TV4', value='TV4 Sweeden')
|
||||
rebulk.string('TVL', value='TV Land')
|
||||
rebulk.regex('TV-?Land', value='TV Land')
|
||||
rebulk.string('UFC', value='UFC')
|
||||
rebulk.string('UKTV', value='UKTV')
|
||||
rebulk.string('UNIV', value='Univision')
|
||||
rebulk.string('USAN', value='USA Network')
|
||||
rebulk.string('VLCT', value='Velocity')
|
||||
rebulk.string('VH1', value='VH1')
|
||||
rebulk.string('VICE', value='Viceland')
|
||||
rebulk.string('VMEO', value='Vimeo')
|
||||
rebulk.string('VRV', value='VRV')
|
||||
rebulk.string('WNET', value='W Network')
|
||||
rebulk.string('WME', value='WatchMe')
|
||||
rebulk.string('WWEN', value='WWE Network')
|
||||
rebulk.string('XBOX', value='Xbox Video')
|
||||
rebulk.string('YHOO', value='Yahoo')
|
||||
rebulk.string('RED', value='YouTube Red')
|
||||
rebulk.string('ZDF', value='ZDF')
|
||||
|
||||
rebulk.rules(ValidateStreamingService)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class ValidateStreamingService(Rule):
|
||||
"""Validate streaming service matches."""
|
||||
|
||||
priority = 32
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
"""Streaming service is always before source.
|
||||
|
||||
:param matches:
|
||||
:type matches: rebulk.match.Matches
|
||||
:param context:
|
||||
:type context: dict
|
||||
:return:
|
||||
"""
|
||||
to_remove = []
|
||||
for service in matches.named('streaming_service'):
|
||||
next_match = matches.next(service, lambda match: 'streaming_service.suffix' in match.tags, 0)
|
||||
previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
|
||||
has_other = service.initiator and service.initiator.children.named('other')
|
||||
|
||||
if not has_other:
|
||||
if (not next_match or
|
||||
matches.holes(service.end, next_match.start,
|
||||
predicate=lambda match: match.value.strip(seps)) or
|
||||
not seps_before(service)):
|
||||
if (not previous_match or
|
||||
matches.holes(previous_match.end, service.start,
|
||||
predicate=lambda match: match.value.strip(seps)) or
|
||||
not seps_after(service)):
|
||||
to_remove.append(service)
|
||||
continue
|
||||
|
||||
if service.value == 'Comedy Central':
|
||||
# Current match is a valid streaming service, removing invalid Criterion Collection (CC) matches
|
||||
to_remove.extend(matches.named('edition', predicate=lambda match: match.value == 'Criterion'))
|
||||
|
||||
return to_remove
|
332
libs/common/guessit/rules/properties/title.py
Normal file
332
libs/common/guessit/rules/properties/title.py
Normal file
|
@ -0,0 +1,332 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
title property
|
||||
"""
|
||||
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
|
||||
from rebulk.formatters import formatters
|
||||
|
||||
from .film import FilmTitleRule
|
||||
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
|
||||
from ..common import seps, title_seps
|
||||
from ..common.comparators import marker_sorted
|
||||
from ..common.expected import build_expected_function
|
||||
from ..common.formatters import cleanup, reorder_title
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def title(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
|
||||
rebulk.rules(TitleFromPosition, PreferTitleWithYear)
|
||||
|
||||
expected_title = build_expected_function('expected_title')
|
||||
|
||||
rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
|
||||
validator=seps_surround,
|
||||
formatter=formatters(cleanup, reorder_title),
|
||||
conflict_solver=lambda match, other: other,
|
||||
disabled=lambda context: not context.get('expected_title'))
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class TitleBaseRule(Rule):
|
||||
"""
|
||||
Add title match in existing matches
|
||||
"""
|
||||
# pylint:disable=no-self-use,unused-argument
|
||||
consequence = [AppendMatch, RemoveMatch]
|
||||
|
||||
def __init__(self, match_name, match_tags=None, alternative_match_name=None):
|
||||
super(TitleBaseRule, self).__init__()
|
||||
self.match_name = match_name
|
||||
self.match_tags = match_tags
|
||||
self.alternative_match_name = alternative_match_name
|
||||
|
||||
def hole_filter(self, hole, matches):
|
||||
"""
|
||||
Filter holes for titles.
|
||||
:param hole:
|
||||
:type hole:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return True
|
||||
|
||||
def filepart_filter(self, filepart, matches):
|
||||
"""
|
||||
Filter filepart for titles.
|
||||
:param filepart:
|
||||
:type filepart:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return True
|
||||
|
||||
def holes_process(self, holes, matches):
|
||||
"""
|
||||
process holes
|
||||
:param holes:
|
||||
:type holes:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
cropped_holes = []
|
||||
for hole in holes:
|
||||
group_markers = matches.markers.named('group')
|
||||
cropped_holes.extend(hole.crop(group_markers))
|
||||
return cropped_holes
|
||||
|
||||
def is_ignored(self, match):
|
||||
"""
|
||||
Ignore matches when scanning for title (hole).
|
||||
|
||||
Full word language and countries won't be ignored if they are uppercase.
|
||||
"""
|
||||
return not (len(match) > 3 and match.raw.isupper()) and match.name in ('language', 'country', 'episode_details')
|
||||
|
||||
def should_keep(self, match, to_keep, matches, filepart, hole, starting):
|
||||
"""
|
||||
Check if this match should be accepted when ending or starting a hole.
|
||||
:param match:
|
||||
:type match:
|
||||
:param to_keep:
|
||||
:type to_keep: list[Match]
|
||||
:param matches:
|
||||
:type matches: Matches
|
||||
:param hole: the filepart match
|
||||
:type hole: Match
|
||||
:param hole: the hole match
|
||||
:type hole: Match
|
||||
:param starting: true if match is starting the hole
|
||||
:type starting: bool
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if match.name in ('language', 'country'):
|
||||
# Keep language if exactly matching the hole.
|
||||
if len(hole.value) == len(match.raw):
|
||||
return True
|
||||
|
||||
# Keep language if other languages exists in the filepart.
|
||||
outside_matches = filepart.crop(hole)
|
||||
other_languages = []
|
||||
for outside in outside_matches:
|
||||
other_languages.extend(matches.range(outside.start, outside.end,
|
||||
lambda c_match: c_match.name == match.name and
|
||||
c_match not in to_keep))
|
||||
|
||||
if not other_languages and (not starting or len(match.raw) <= 3):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def should_remove(self, match, matches, filepart, hole, context):
|
||||
"""
|
||||
Check if this match should be removed after beeing ignored.
|
||||
:param match:
|
||||
:param matches:
|
||||
:param filepart:
|
||||
:param hole:
|
||||
:return:
|
||||
"""
|
||||
if context.get('type') == 'episode' and match.name == 'episode_details':
|
||||
return match.start >= hole.start and match.end <= hole.end
|
||||
return True
|
||||
|
||||
def check_titles_in_filepart(self, filepart, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Find title in filepart (ignoring language)
|
||||
"""
|
||||
# pylint:disable=too-many-locals,too-many-branches,too-many-statements
|
||||
start, end = filepart.span
|
||||
|
||||
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
|
||||
ignore=self.is_ignored,
|
||||
predicate=lambda m: m.value)
|
||||
|
||||
holes = self.holes_process(holes, matches)
|
||||
|
||||
for hole in holes:
|
||||
if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
|
||||
continue
|
||||
|
||||
to_remove = []
|
||||
to_keep = []
|
||||
|
||||
ignored_matches = matches.range(hole.start, hole.end, self.is_ignored)
|
||||
|
||||
if ignored_matches:
|
||||
for ignored_match in reversed(ignored_matches):
|
||||
# pylint:disable=undefined-loop-variable, cell-var-from-loop
|
||||
trailing = matches.chain_before(hole.end, seps, predicate=lambda m: m == ignored_match)
|
||||
if trailing:
|
||||
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
|
||||
if should_keep:
|
||||
# pylint:disable=unpacking-non-sequence
|
||||
try:
|
||||
append, crop = should_keep
|
||||
except TypeError:
|
||||
append, crop = should_keep, should_keep
|
||||
if append:
|
||||
to_keep.append(ignored_match)
|
||||
if crop:
|
||||
hole.end = ignored_match.start
|
||||
|
||||
for ignored_match in ignored_matches:
|
||||
if ignored_match not in to_keep:
|
||||
starting = matches.chain_after(hole.start, seps,
|
||||
predicate=lambda m: m == ignored_match)
|
||||
if starting:
|
||||
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
|
||||
if should_keep:
|
||||
# pylint:disable=unpacking-non-sequence
|
||||
try:
|
||||
append, crop = should_keep
|
||||
except TypeError:
|
||||
append, crop = should_keep, should_keep
|
||||
if append:
|
||||
to_keep.append(ignored_match)
|
||||
if crop:
|
||||
hole.start = ignored_match.end
|
||||
|
||||
for match in ignored_matches:
|
||||
if self.should_remove(match, matches, filepart, hole, context):
|
||||
to_remove.append(match)
|
||||
for keep_match in to_keep:
|
||||
if keep_match in to_remove:
|
||||
to_remove.remove(keep_match)
|
||||
|
||||
if hole and hole.value:
|
||||
hole.name = self.match_name
|
||||
hole.tags = self.match_tags
|
||||
if self.alternative_match_name:
|
||||
# Split and keep values that can be a title
|
||||
titles = hole.split(title_seps, lambda m: m.value)
|
||||
for title_match in list(titles[1:]):
|
||||
previous_title = titles[titles.index(title_match) - 1]
|
||||
separator = matches.input_string[previous_title.end:title_match.start]
|
||||
if len(separator) == 1 and separator == '-' \
|
||||
and previous_title.raw[-1] not in seps \
|
||||
and title_match.raw[0] not in seps:
|
||||
titles[titles.index(title_match) - 1].end = title_match.end
|
||||
titles.remove(title_match)
|
||||
else:
|
||||
title_match.name = self.alternative_match_name
|
||||
|
||||
else:
|
||||
titles = [hole]
|
||||
return titles, to_remove
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
to_remove = []
|
||||
|
||||
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
|
||||
return ret, to_remove
|
||||
|
||||
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
|
||||
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
|
||||
|
||||
# Priorize fileparts containing the year
|
||||
years_fileparts = []
|
||||
for filepart in fileparts:
|
||||
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
||||
if year_match:
|
||||
years_fileparts.append(filepart)
|
||||
|
||||
for filepart in fileparts:
|
||||
try:
|
||||
years_fileparts.remove(filepart)
|
||||
except ValueError:
|
||||
pass
|
||||
titles = self.check_titles_in_filepart(filepart, matches, context)
|
||||
if titles:
|
||||
titles, to_remove_c = titles
|
||||
ret.extend(titles)
|
||||
to_remove.extend(to_remove_c)
|
||||
break
|
||||
|
||||
# Add title match in all fileparts containing the year.
|
||||
for filepart in years_fileparts:
|
||||
titles = self.check_titles_in_filepart(filepart, matches, context)
|
||||
if titles:
|
||||
# pylint:disable=unbalanced-tuple-unpacking
|
||||
titles, to_remove_c = titles
|
||||
ret.extend(titles)
|
||||
to_remove.extend(to_remove_c)
|
||||
|
||||
return ret, to_remove
|
||||
|
||||
|
||||
class TitleFromPosition(TitleBaseRule):
|
||||
"""
|
||||
Add title match in existing matches
|
||||
"""
|
||||
dependency = [FilmTitleRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule]
|
||||
|
||||
properties = {'title': [None], 'alternative_title': [None]}
|
||||
|
||||
def __init__(self):
|
||||
super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'alternative_title')
|
||||
|
||||
|
||||
class PreferTitleWithYear(Rule):
|
||||
"""
|
||||
Prefer title where filepart contains year.
|
||||
"""
|
||||
dependency = TitleFromPosition
|
||||
consequence = [RemoveMatch, AppendTags(['equivalent-ignore'])]
|
||||
|
||||
properties = {'title': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
with_year_in_group = []
|
||||
with_year = []
|
||||
titles = matches.named('title')
|
||||
|
||||
for title_match in titles:
|
||||
filepart = matches.markers.at_match(title_match, lambda marker: marker.name == 'path', 0)
|
||||
if filepart:
|
||||
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
||||
if year_match:
|
||||
group = matches.markers.at_match(year_match, lambda m: m.name == 'group')
|
||||
if group:
|
||||
with_year_in_group.append(title_match)
|
||||
else:
|
||||
with_year.append(title_match)
|
||||
|
||||
to_tag = []
|
||||
if with_year_in_group:
|
||||
title_values = {title_match.value for title_match in with_year_in_group}
|
||||
to_tag.extend(with_year_in_group)
|
||||
elif with_year:
|
||||
title_values = {title_match.value for title_match in with_year}
|
||||
to_tag.extend(with_year)
|
||||
else:
|
||||
title_values = {title_match.value for title_match in titles}
|
||||
|
||||
to_remove = []
|
||||
for title_match in titles:
|
||||
if title_match.value not in title_values:
|
||||
to_remove.append(title_match)
|
||||
return to_remove, to_tag
|
83
libs/common/guessit/rules/properties/type.py
Normal file
83
libs/common/guessit/rules/properties/type.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
type property
|
||||
"""
|
||||
from rebulk import CustomRule, Rebulk, POST_PROCESS
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ...rules.processors import Processors
|
||||
|
||||
|
||||
def _type(matches, value):
|
||||
"""
|
||||
Define type match with given value.
|
||||
:param matches:
|
||||
:param value:
|
||||
:return:
|
||||
"""
|
||||
matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
|
||||
|
||||
|
||||
def type_(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'type'))
|
||||
rebulk = rebulk.rules(TypeProcessor)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class TypeProcessor(CustomRule):
|
||||
"""
|
||||
Post processor to find file type based on all others found matches.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
dependency = Processors
|
||||
|
||||
properties = {'type': ['episode', 'movie']}
|
||||
|
||||
def when(self, matches, context): # pylint:disable=too-many-return-statements
|
||||
option_type = context.get('type', None)
|
||||
if option_type:
|
||||
return option_type
|
||||
|
||||
episode = matches.named('episode')
|
||||
season = matches.named('season')
|
||||
absolute_episode = matches.named('absolute_episode')
|
||||
episode_details = matches.named('episode_details')
|
||||
|
||||
if episode or season or episode_details or absolute_episode:
|
||||
return 'episode'
|
||||
|
||||
film = matches.named('film')
|
||||
if film:
|
||||
return 'movie'
|
||||
|
||||
year = matches.named('year')
|
||||
date = matches.named('date')
|
||||
|
||||
if date and not year:
|
||||
return 'episode'
|
||||
|
||||
bonus = matches.named('bonus')
|
||||
if bonus and not year:
|
||||
return 'episode'
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
anime_release_group = matches.named('release_group', lambda match: 'anime' in match.tags)
|
||||
if crc32 and anime_release_group:
|
||||
return 'episode'
|
||||
|
||||
return 'movie'
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
_type(matches, when_response)
|
125
libs/common/guessit/rules/properties/video_codec.py
Normal file
125
libs/common/guessit/rules/properties/video_codec.py
Normal file
|
@ -0,0 +1,125 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
video_codec and video_profile property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_after, seps_before, seps_surround
|
||||
|
||||
|
||||
def video_codec(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="video_codec",
|
||||
tags=['source-suffix', 'streaming_service.suffix'],
|
||||
disabled=lambda context: is_disabled(context, 'video_codec'))
|
||||
|
||||
rebulk.regex(r'Rv\d{2}', value='RealVideo')
|
||||
rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
|
||||
rebulk.string("DVDivX", "DivX", value="DivX")
|
||||
rebulk.string('XviD', value='Xvid')
|
||||
rebulk.regex('VC-?1', value='VC-1')
|
||||
rebulk.string('VP7', value='VP7')
|
||||
rebulk.string('VP8', 'VP80', value='VP8')
|
||||
rebulk.string('VP9', value='VP9')
|
||||
rebulk.regex('[hx]-?263', value='H.263')
|
||||
rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
|
||||
rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
|
||||
rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
|
||||
tags=['video-codec-suffix'], children=True)
|
||||
|
||||
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
|
||||
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
|
||||
rebulk.defaults(name="video_profile",
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'video_profile'))
|
||||
|
||||
rebulk.string('BP', value='Baseline', tags='video_profile.rule')
|
||||
rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
|
||||
rebulk.string('MP', value='Main', tags='video_profile.rule')
|
||||
rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')
|
||||
|
||||
# https://en.wikipedia.org/wiki/Scalable_Video_Coding
|
||||
rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
|
||||
# https://en.wikipedia.org/wiki/AVCHD
|
||||
rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
|
||||
# https://en.wikipedia.org/wiki/H.265/HEVC
|
||||
rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')
|
||||
|
||||
rebulk.regex('Hi422P', value='High 4:2:2')
|
||||
rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
|
||||
rebulk.regex('Hi10P?', value='High 10') # no profile validation is required
|
||||
|
||||
rebulk.string('DXVA', value='DXVA', name='video_api',
|
||||
disabled=lambda context: is_disabled(context, 'video_api'))
|
||||
|
||||
rebulk.defaults(name='color_depth',
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'color_depth'))
|
||||
rebulk.regex('12.?bits?', value='12-bit')
|
||||
rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
|
||||
rebulk.regex('8.?bits?', value='8-bit')
|
||||
|
||||
rebulk.rules(ValidateVideoCodec, VideoProfileRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class ValidateVideoCodec(Rule):
|
||||
"""
|
||||
Validate video_codec with source property or separated
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'video_codec')
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for codec in matches.named('video_codec'):
|
||||
if not seps_before(codec) and \
|
||||
not matches.at_index(codec.start - 1, lambda match: 'video-codec-prefix' in match.tags):
|
||||
ret.append(codec)
|
||||
continue
|
||||
if not seps_after(codec) and \
|
||||
not matches.at_index(codec.end + 1, lambda match: 'video-codec-suffix' in match.tags):
|
||||
ret.append(codec)
|
||||
continue
|
||||
return ret
|
||||
|
||||
|
||||
class VideoProfileRule(Rule):
|
||||
"""
|
||||
Rule to validate video_profile
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'video_profile')
|
||||
|
||||
def when(self, matches, context):
|
||||
profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
|
||||
ret = []
|
||||
for profile in profile_list:
|
||||
codec = matches.at_span(profile.span, lambda match: match.name == 'video_codec', 0)
|
||||
if not codec:
|
||||
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
|
||||
if not codec:
|
||||
codec = matches.next(profile, lambda match: match.name == 'video_codec')
|
||||
if not codec:
|
||||
ret.append(profile)
|
||||
return ret
|
106
libs/common/guessit/rules/properties/website.py
Normal file
106
libs/common/guessit/rules/properties/website.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Website property.
|
||||
"""
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from ..common import seps
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def website(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="website")
|
||||
|
||||
with resource_stream('guessit', 'tlds-alpha-by-domain.txt') as tld_file:
|
||||
tlds = [
|
||||
tld.strip().decode('utf-8')
|
||||
for tld in tld_file.readlines()
|
||||
if b'--' not in tld
|
||||
][1:] # All registered domain extension
|
||||
|
||||
safe_tlds = config['safe_tlds'] # For sure a website extension
|
||||
safe_subdomains = config['safe_subdomains'] # For sure a website subdomain
|
||||
safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure
|
||||
website_prefixes = config['prefixes']
|
||||
|
||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||
r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
|
||||
r'))(?:[^a-z0-9]|$)',
|
||||
children=True)
|
||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
|
||||
r'))(?:[^a-z0-9]|$)',
|
||||
safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
|
||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
|
||||
r'\.)+(?:'+build_or_pattern(tlds) +
|
||||
r'))(?:[^a-z0-9]|$)',
|
||||
safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)
|
||||
|
||||
rebulk.string(*website_prefixes,
|
||||
validator=seps_surround, private=True, tags=['website.prefix'])
|
||||
|
||||
class PreferTitleOverWebsite(Rule):
|
||||
"""
|
||||
If found match is more likely a title, remove website.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
@staticmethod
|
||||
def valid_followers(match):
|
||||
"""
|
||||
Validator for next website matches
|
||||
"""
|
||||
return any(name in ['season', 'episode', 'year'] for name in match.names)
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for website_match in matches.named('website'):
|
||||
safe = False
|
||||
for safe_start in safe_subdomains + safe_prefix:
|
||||
if website_match.value.lower().startswith(safe_start):
|
||||
safe = True
|
||||
break
|
||||
if not safe:
|
||||
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
|
||||
if suffix:
|
||||
to_remove.append(website_match)
|
||||
return to_remove
|
||||
|
||||
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class ValidateWebsitePrefix(Rule):
|
||||
"""
|
||||
Validate website prefixes
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for prefix in matches.tagged('website.prefix'):
|
||||
website_match = matches.next(prefix, predicate=lambda match: match.name == 'website', index=0)
|
||||
if (not website_match or
|
||||
matches.holes(prefix.end, website_match.start,
|
||||
formatter=cleanup, seps=seps, predicate=lambda match: match.value)):
|
||||
to_remove.append(prefix)
|
||||
return to_remove
|
Loading…
Add table
Add a link
Reference in a new issue