mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-14 02:26:53 -07:00
updated libs to fix guessit and subliminal. Fixes #1080
This commit is contained in:
parent
319d418af8
commit
0625f7f3c0
263 changed files with 28711 additions and 12615 deletions
165
libs/guessit/rules/common/numeral.py
Normal file
165
libs/guessit/rules/common/numeral.py
Normal file
|
@ -0,0 +1,165 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
parse numeral from various formats
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
digital_numeral = r'\d{1,4}'
|
||||
|
||||
roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
|
||||
|
||||
english_word_numeral_list = [
|
||||
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
|
||||
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
|
||||
]
|
||||
|
||||
french_word_numeral_list = [
|
||||
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
|
||||
]
|
||||
|
||||
french_alt_word_numeral_list = [
|
||||
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
|
||||
]
|
||||
|
||||
|
||||
def __build_word_numeral(*args):
|
||||
"""
|
||||
Build word numeral regexp from list.
|
||||
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
re_ = None
|
||||
for word_list in args:
|
||||
for word in word_list:
|
||||
if not re_:
|
||||
re_ = r'(?:(?=\w+)'
|
||||
else:
|
||||
re_ += '|'
|
||||
re_ += word
|
||||
re_ += ')'
|
||||
return re_
|
||||
|
||||
|
||||
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
|
||||
|
||||
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
|
||||
|
||||
__romanNumeralMap = (
|
||||
('M', 1000),
|
||||
('CM', 900),
|
||||
('D', 500),
|
||||
('CD', 400),
|
||||
('C', 100),
|
||||
('XC', 90),
|
||||
('L', 50),
|
||||
('XL', 40),
|
||||
('X', 10),
|
||||
('IX', 9),
|
||||
('V', 5),
|
||||
('IV', 4),
|
||||
('I', 1)
|
||||
)
|
||||
|
||||
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
|
||||
|
||||
|
||||
def __parse_roman(value):
|
||||
"""
|
||||
convert Roman numeral to integer
|
||||
|
||||
:param value: Value to parse
|
||||
:type value: string
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not __romanNumeralPattern.search(value):
|
||||
raise ValueError('Invalid Roman numeral: %s' % value)
|
||||
|
||||
result = 0
|
||||
index = 0
|
||||
for num, integer in __romanNumeralMap:
|
||||
while value[index:index + len(num)] == num:
|
||||
result += integer
|
||||
index += len(num)
|
||||
return result
|
||||
|
||||
|
||||
def __parse_word(value):
|
||||
"""
|
||||
Convert Word numeral to integer
|
||||
|
||||
:param value: Value to parse
|
||||
:type value: string
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
|
||||
try:
|
||||
return word_list.index(value.lower())
|
||||
except ValueError:
|
||||
pass
|
||||
raise ValueError # pragma: no cover
|
||||
|
||||
|
||||
_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
|
||||
|
||||
|
||||
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
|
||||
"""
|
||||
Parse a numeric value into integer.
|
||||
|
||||
:param value: Value to parse. Can be an integer, roman numeral or word.
|
||||
:type value: string
|
||||
:param int_enabled:
|
||||
:type int_enabled:
|
||||
:param roman_enabled:
|
||||
:type roman_enabled:
|
||||
:param word_enabled:
|
||||
:type word_enabled:
|
||||
:param clean:
|
||||
:type clean:
|
||||
:return: Numeric value, or None if value can't be parsed
|
||||
:rtype: int
|
||||
"""
|
||||
# pylint: disable=too-many-branches
|
||||
if int_enabled:
|
||||
try:
|
||||
if clean:
|
||||
match = _clean_re.match(value)
|
||||
if match:
|
||||
clean_value = match.group(1)
|
||||
return int(clean_value)
|
||||
return int(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if roman_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_roman(word.upper())
|
||||
except ValueError:
|
||||
pass
|
||||
return __parse_roman(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if word_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_word(word)
|
||||
except ValueError: # pragma: no cover
|
||||
pass
|
||||
return __parse_word(value) # pragma: no cover
|
||||
except ValueError: # pragma: no cover
|
||||
pass
|
||||
raise ValueError('Invalid numeral: ' + value) # pragma: no cover
|
Loading…
Add table
Add a link
Reference in a new issue