mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-07-16 02:02:53 -07:00
updated libs to fix guessit and subliminal. Fixes #1080
This commit is contained in:
parent
319d418af8
commit
0625f7f3c0
263 changed files with 28711 additions and 12615 deletions
136
libs/guessit/rules/common/formatters.py
Normal file
136
libs/guessit/rules/common/formatters.py
Normal file
|
@ -0,0 +1,136 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Formatters
|
||||
"""
|
||||
from rebulk.formatters import formatters
|
||||
from rebulk.remodule import re
|
||||
from . import seps
|
||||
|
||||
_excluded_clean_chars = ',:;-/\\'
|
||||
clean_chars = ""
|
||||
for sep in seps:
|
||||
if sep not in _excluded_clean_chars:
|
||||
clean_chars += sep
|
||||
|
||||
|
||||
def _potential_before(i, input_string):
|
||||
"""
|
||||
Check if the character at position i can be a potential single char separator considering what's before it.
|
||||
|
||||
:param i:
|
||||
:type i: int
|
||||
:param input_string:
|
||||
:type input_string: str
|
||||
:return:
|
||||
:rtype: bool
|
||||
"""
|
||||
return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps
|
||||
|
||||
|
||||
def _potential_after(i, input_string):
|
||||
"""
|
||||
Check if the character at position i can be a potential single char separator considering what's after it.
|
||||
|
||||
:param i:
|
||||
:type i: int
|
||||
:param input_string:
|
||||
:type input_string: str
|
||||
:return:
|
||||
:rtype: bool
|
||||
"""
|
||||
return i + 2 >= len(input_string) or \
|
||||
input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps
|
||||
|
||||
|
||||
def cleanup(input_string):
|
||||
"""
|
||||
Removes and strip separators from input_string (but keep ',;' characters)
|
||||
|
||||
It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)
|
||||
|
||||
:param input_string:
|
||||
:type input_string: str
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
clean_string = input_string
|
||||
for char in clean_chars:
|
||||
clean_string = clean_string.replace(char, ' ')
|
||||
|
||||
# Restore input separator if they separate single characters.
|
||||
# Useful for Mavels Agents of S.H.I.E.L.D.
|
||||
# https://github.com/guessit-io/guessit/issues/278
|
||||
|
||||
indices = [i for i, letter in enumerate(clean_string) if letter in seps]
|
||||
|
||||
dots = set()
|
||||
if indices:
|
||||
clean_list = list(clean_string)
|
||||
|
||||
potential_indices = []
|
||||
|
||||
for i in indices:
|
||||
if _potential_before(i, input_string) and _potential_after(i, input_string):
|
||||
potential_indices.append(i)
|
||||
|
||||
replace_indices = []
|
||||
|
||||
for potential_index in potential_indices:
|
||||
if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
|
||||
replace_indices.append(potential_index)
|
||||
|
||||
if replace_indices:
|
||||
for replace_index in replace_indices:
|
||||
dots.add(input_string[replace_index])
|
||||
clean_list[replace_index] = input_string[replace_index]
|
||||
clean_string = ''.join(clean_list)
|
||||
|
||||
clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))
|
||||
|
||||
clean_string = re.sub(' +', ' ', clean_string)
|
||||
return clean_string
|
||||
|
||||
|
||||
def strip(input_string, chars=seps):
|
||||
"""
|
||||
Strip separators from input_string
|
||||
:param input_string:
|
||||
:param chars:
|
||||
:type input_string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return input_string.strip(chars)
|
||||
|
||||
|
||||
def raw_cleanup(raw):
|
||||
"""
|
||||
Cleanup a raw value to perform raw comparison
|
||||
:param raw:
|
||||
:type raw:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return formatters(cleanup, strip)(raw.lower())
|
||||
|
||||
|
||||
def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
||||
"""
|
||||
Reorder the title
|
||||
:param title:
|
||||
:type title:
|
||||
:param articles:
|
||||
:type articles:
|
||||
:param separators:
|
||||
:type separators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ltitle = title.lower()
|
||||
for article in articles:
|
||||
for separator in separators:
|
||||
suffix = separator + article
|
||||
if ltitle[-len(suffix):] == suffix:
|
||||
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
|
||||
return title
|
Loading…
Add table
Add a link
Reference in a new issue