mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-07-31 03:50:06 -07:00
Switched out guessit libs for the one CP uses, seems to have less depends
This commit is contained in:
parent
a6cd0f156b
commit
6fea9ddb40
65 changed files with 2034 additions and 7313 deletions
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,11 +18,9 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import pkg_resources
|
||||
from .__version__ import __version__
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '0.6.2'
|
||||
__all__ = ['Guess', 'Language',
|
||||
'guess_file_info', 'guess_video_info',
|
||||
'guess_movie_info', 'guess_episode_info']
|
||||
|
@ -32,69 +30,58 @@ __all__ = ['Guess', 'Language',
|
|||
# it will then always be available
|
||||
# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
|
||||
import sys
|
||||
if sys.version_info[0] >= 3: # pragma: no cover
|
||||
PY2, PY3 = False, True
|
||||
if sys.version_info[0] >= 3:
|
||||
PY3 = True
|
||||
unicode_text_type = str
|
||||
native_text_type = str
|
||||
base_text_type = str
|
||||
|
||||
def u(x):
|
||||
return str(x)
|
||||
|
||||
def s(x):
|
||||
return x
|
||||
|
||||
class UnicodeMixin(object):
|
||||
__str__ = lambda x: x.__unicode__()
|
||||
import binascii
|
||||
|
||||
def to_hex(x):
|
||||
return binascii.hexlify(x).decode('utf-8')
|
||||
|
||||
else: # pragma: no cover
|
||||
PY2, PY3 = True, False
|
||||
__all__ = [str(s) for s in __all__] # fix imports for python2
|
||||
else:
|
||||
PY3 = False
|
||||
__all__ = [ str(s) for s in __all__ ] # fix imports for python2
|
||||
unicode_text_type = unicode
|
||||
native_text_type = str
|
||||
base_text_type = basestring
|
||||
|
||||
def u(x):
|
||||
if isinstance(x, str):
|
||||
return x.decode('utf-8')
|
||||
if isinstance(x, list):
|
||||
return [u(s) for s in x]
|
||||
return unicode(x)
|
||||
|
||||
def s(x):
|
||||
if isinstance(x, unicode):
|
||||
return x.encode('utf-8')
|
||||
if isinstance(x, list):
|
||||
return [s(y) for y in x]
|
||||
return [ s(y) for y in x ]
|
||||
if isinstance(x, tuple):
|
||||
return tuple(s(y) for y in x)
|
||||
if isinstance(x, dict):
|
||||
return dict((s(key), s(value)) for key, value in x.items())
|
||||
return x
|
||||
|
||||
class UnicodeMixin(object):
|
||||
__str__ = lambda x: unicode(x).encode('utf-8')
|
||||
|
||||
def to_hex(x):
|
||||
return x.encode('hex')
|
||||
|
||||
range = xrange
|
||||
|
||||
from guessit.guess import Guess, merge_all
|
||||
from guessit.language import Language
|
||||
from guessit.matcher import IterativeMatcher
|
||||
from guessit.textutils import clean_string, is_camel, from_camel
|
||||
import os.path
|
||||
from guessit.textutils import clean_string
|
||||
import logging
|
||||
import json
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
class NullHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
@ -104,74 +91,137 @@ h = NullHandler()
|
|||
log.addHandler(h)
|
||||
|
||||
|
||||
def _guess_filename(filename, options=None, **kwargs):
|
||||
mtree = _build_filename_mtree(filename, options=options, **kwargs)
|
||||
_add_camel_properties(mtree, options=options)
|
||||
return mtree.matched()
|
||||
def _guess_filename(filename, filetype):
|
||||
def find_nodes(tree, props):
|
||||
"""Yields all nodes containing any of the given props."""
|
||||
if isinstance(props, base_text_type):
|
||||
props = [props]
|
||||
for node in tree.nodes():
|
||||
if any(prop in node.guess for prop in props):
|
||||
yield node
|
||||
|
||||
def warning(title):
|
||||
log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string()))
|
||||
return m
|
||||
|
||||
mtree = IterativeMatcher(filename, filetype=filetype)
|
||||
|
||||
m = mtree.matched()
|
||||
|
||||
second_pass_opts = []
|
||||
second_pass_transfo_opts = {}
|
||||
|
||||
# if there are multiple possible years found, we assume the first one is
|
||||
# part of the title, reparse the tree taking this into account
|
||||
years = set(n.value for n in find_nodes(mtree.match_tree, 'year'))
|
||||
if len(years) >= 2:
|
||||
second_pass_opts.append('skip_first_year')
|
||||
|
||||
to_skip_language_nodes = []
|
||||
|
||||
title_nodes = set(n for n in find_nodes(mtree.match_tree, ['title', 'series']))
|
||||
title_spans = {}
|
||||
for title_node in title_nodes:
|
||||
title_spans[title_node.span[0]] = title_node
|
||||
title_spans[title_node.span[1]] = title_node
|
||||
|
||||
for lang_key in ('language', 'subtitleLanguage'):
|
||||
langs = {}
|
||||
lang_nodes = set(n for n in find_nodes(mtree.match_tree, lang_key))
|
||||
|
||||
for lang_node in lang_nodes:
|
||||
lang = lang_node.guess.get(lang_key, None)
|
||||
if len(lang_node.value) > 3 and (lang_node.span[0] in title_spans.keys() or lang_node.span[1] in title_spans.keys()):
|
||||
# Language is next or before title, and is not a language code. Add to skip for 2nd pass.
|
||||
|
||||
# if filetype is subtitle and the language appears last, just before
|
||||
# the extension, then it is likely a subtitle language
|
||||
parts = clean_string(lang_node.root.value).split()
|
||||
if m['type'] in ['moviesubtitle', 'episodesubtitle'] and (parts.index(lang_node.value) == len(parts) - 2):
|
||||
continue
|
||||
|
||||
to_skip_language_nodes.append(lang_node)
|
||||
elif not lang in langs:
|
||||
langs[lang] = lang_node
|
||||
else:
|
||||
# The same language was found. Keep the more confident one, and add others to skip for 2nd pass.
|
||||
existing_lang_node = langs[lang]
|
||||
to_skip = None
|
||||
if existing_lang_node.guess.confidence('language') >= lang_node.guess.confidence('language'):
|
||||
# lang_node is to remove
|
||||
to_skip = lang_node
|
||||
else:
|
||||
# existing_lang_node is to remove
|
||||
langs[lang] = lang_node
|
||||
to_skip = existing_lang_node
|
||||
to_skip_language_nodes.append(to_skip)
|
||||
|
||||
|
||||
def _build_filename_mtree(filename, options=None, **kwargs):
|
||||
mtree = IterativeMatcher(filename, options=options, **kwargs)
|
||||
second_pass_options = mtree.second_pass_options
|
||||
if second_pass_options:
|
||||
log.info("Running 2nd pass")
|
||||
merged_options = dict(options)
|
||||
merged_options.update(second_pass_options)
|
||||
mtree = IterativeMatcher(filename, options=merged_options, **kwargs)
|
||||
return mtree
|
||||
if to_skip_language_nodes:
|
||||
second_pass_transfo_opts['guess_language'] = (
|
||||
((), { 'skip': [ { 'node_idx': node.parent.node_idx,
|
||||
'span': node.span }
|
||||
for node in to_skip_language_nodes ] }))
|
||||
|
||||
if second_pass_opts or second_pass_transfo_opts:
|
||||
# 2nd pass is needed
|
||||
log.info("Running 2nd pass with options: %s" % second_pass_opts)
|
||||
log.info("Transfo options: %s" % second_pass_transfo_opts)
|
||||
mtree = IterativeMatcher(filename, filetype=filetype,
|
||||
opts=second_pass_opts,
|
||||
transfo_opts=second_pass_transfo_opts)
|
||||
|
||||
m = mtree.matched()
|
||||
|
||||
if 'language' not in m and 'subtitleLanguage' not in m or 'title' not in m:
|
||||
return m
|
||||
|
||||
# if we found some language, make sure we didn't cut a title or sth...
|
||||
mtree2 = IterativeMatcher(filename, filetype=filetype,
|
||||
opts=['nolanguage', 'nocountry'])
|
||||
m2 = mtree2.matched()
|
||||
|
||||
if m.get('title') != m2.get('title'):
|
||||
title = next(find_nodes(mtree.match_tree, 'title'))
|
||||
title2 = next(find_nodes(mtree2.match_tree, 'title'))
|
||||
|
||||
# if a node is in an explicit group, then the correct title is probably
|
||||
# the other one
|
||||
if title.root.node_at(title.node_idx[:2]).is_explicit():
|
||||
return m2
|
||||
elif title2.root.node_at(title2.node_idx[:2]).is_explicit():
|
||||
return m
|
||||
|
||||
return m
|
||||
|
||||
|
||||
def _add_camel_properties(mtree, options=None, **kwargs):
|
||||
prop = 'title' if mtree.matched().get('type') != 'episode' else 'series'
|
||||
value = mtree.matched().get(prop)
|
||||
_guess_camel_string(mtree, value, options=options, skip_title=False, **kwargs)
|
||||
|
||||
for leaf in mtree.match_tree.unidentified_leaves():
|
||||
value = leaf.value
|
||||
_guess_camel_string(mtree, value, options=options, skip_title=True, **kwargs)
|
||||
|
||||
|
||||
def _guess_camel_string(mtree, string, options=None, skip_title=False, **kwargs):
|
||||
if string and is_camel(string):
|
||||
log.info('"%s" is camel cased. Try to detect more properties.' % (string,))
|
||||
uncameled_value = from_camel(string)
|
||||
camel_tree = _build_filename_mtree(uncameled_value, options=options, name_only=True, skip_title=skip_title, **kwargs)
|
||||
if len(camel_tree.matched()) > 0:
|
||||
# Title has changed.
|
||||
mtree.matched().update(camel_tree.matched())
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def guess_file_info(filename, info=None, options=None, **kwargs):
|
||||
def guess_file_info(filename, filetype, info=None):
|
||||
"""info can contain the names of the various plugins, such as 'filename' to
|
||||
detect filename info, or 'hash_md5' to get the md5 hash of the file.
|
||||
|
||||
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
|
||||
>>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
|
||||
>>> g['hash_md5'], g['hash_sha1']
|
||||
('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
|
||||
>>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
|
||||
{'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
|
||||
"""
|
||||
info = info or 'filename'
|
||||
options = options or {}
|
||||
|
||||
result = []
|
||||
hashers = []
|
||||
|
||||
# Force unicode as soon as possible
|
||||
filename = u(filename)
|
||||
|
||||
if info is None:
|
||||
info = ['filename']
|
||||
|
||||
if isinstance(info, base_text_type):
|
||||
info = [info]
|
||||
|
||||
for infotype in info:
|
||||
if infotype == 'filename':
|
||||
result.append(_guess_filename(filename, options, **kwargs))
|
||||
result.append(_guess_filename(filename, filetype))
|
||||
|
||||
elif infotype == 'hash_mpc':
|
||||
from guessit.hash_mpc import hash_file
|
||||
try:
|
||||
result.append(Guess({infotype: hash_file(filename)},
|
||||
result.append(Guess({'hash_mpc': hash_file(filename)},
|
||||
confidence=1.0))
|
||||
except Exception as e:
|
||||
log.warning('Could not compute MPC-style hash because: %s' % e)
|
||||
|
@ -179,7 +229,7 @@ def guess_file_info(filename, info=None, options=None, **kwargs):
|
|||
elif infotype == 'hash_ed2k':
|
||||
from guessit.hash_ed2k import hash_file
|
||||
try:
|
||||
result.append(Guess({infotype: hash_file(filename)},
|
||||
result.append(Guess({'hash_ed2k': hash_file(filename)},
|
||||
confidence=1.0))
|
||||
except Exception as e:
|
||||
log.warning('Could not compute ed2k hash because: %s' % e)
|
||||
|
@ -217,16 +267,23 @@ def guess_file_info(filename, info=None, options=None, **kwargs):
|
|||
|
||||
result = merge_all(result)
|
||||
|
||||
# last minute adjustments
|
||||
|
||||
# if country is in the guessed properties, make it part of the filename
|
||||
if 'series' in result and 'country' in result:
|
||||
result['series'] += ' (%s)' % result['country'].alpha2.upper()
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def guess_video_info(filename, info=None, options=None, **kwargs):
|
||||
return guess_file_info(filename, info=info, options=options, type='video', **kwargs)
|
||||
def guess_video_info(filename, info=None):
|
||||
return guess_file_info(filename, 'autodetect', info)
|
||||
|
||||
|
||||
def guess_movie_info(filename, info=None, options=None, **kwargs):
|
||||
return guess_file_info(filename, info=info, options=options, type='movie', **kwargs)
|
||||
def guess_movie_info(filename, info=None):
|
||||
return guess_file_info(filename, 'movie', info)
|
||||
|
||||
|
||||
def guess_episode_info(filename, info=None, options=None, **kwargs):
|
||||
return guess_file_info(filename, info=info, options=options, type='episode', **kwargs)
|
||||
def guess_episode_info(filename, info=None):
|
||||
return guess_file_info(filename, 'episode', info)
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -19,199 +18,109 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
from guessit import u
|
||||
from guessit import slogging, guess_file_info
|
||||
from optparse import OptionParser
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
from guessit import PY2, u, guess_file_info
|
||||
from guessit.options import option_parser
|
||||
import locale
|
||||
|
||||
|
||||
def guess_file(filename, info='filename', options=None, **kwargs):
|
||||
options = options or {}
|
||||
def detect_filename(filename, filetype, info=['filename'], advanced = False):
|
||||
filename = u(filename)
|
||||
|
||||
print('For:', filename)
|
||||
guess = guess_file_info(filename, info, options, **kwargs)
|
||||
if options.get('yaml'):
|
||||
try:
|
||||
import yaml
|
||||
for k, v in guess.items():
|
||||
if isinstance(v, list) and len(v) == 1:
|
||||
guess[k] = v[0]
|
||||
ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False)
|
||||
i = 0
|
||||
for yline in ystr.splitlines():
|
||||
if i == 0:
|
||||
print("? " + yline[:-1])
|
||||
elif i == 1:
|
||||
print(":" + yline[1:])
|
||||
else:
|
||||
print(yline)
|
||||
i = i + 1
|
||||
return
|
||||
except ImportError: # pragma: no cover
|
||||
print('PyYAML not found. Using default output.')
|
||||
print('GuessIt found:', guess.nice_string(options.get('advanced')))
|
||||
print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string(advanced))
|
||||
|
||||
|
||||
def _supported_properties():
|
||||
from guessit.plugins import transformers
|
||||
|
||||
all_properties = {}
|
||||
transformers_properties = []
|
||||
for transformer in transformers.all_transformers():
|
||||
supported_properties = transformer.supported_properties()
|
||||
transformers_properties.append((transformer, supported_properties))
|
||||
|
||||
if isinstance(supported_properties, dict):
|
||||
for property_name, possible_values in supported_properties.items():
|
||||
current_possible_values = all_properties.get(property_name)
|
||||
if current_possible_values is None:
|
||||
current_possible_values = []
|
||||
all_properties[property_name] = current_possible_values
|
||||
if possible_values:
|
||||
current_possible_values.extend(possible_values)
|
||||
else:
|
||||
for property_name in supported_properties:
|
||||
current_possible_values = all_properties.get(property_name)
|
||||
if current_possible_values is None:
|
||||
current_possible_values = []
|
||||
all_properties[property_name] = current_possible_values
|
||||
|
||||
return (all_properties, transformers_properties)
|
||||
|
||||
|
||||
def display_transformers():
|
||||
print('GuessIt transformers:')
|
||||
_, transformers_properties = _supported_properties()
|
||||
for transformer, _ in transformers_properties:
|
||||
print('[@] %s (%s)' % (transformer.name, transformer.priority))
|
||||
|
||||
|
||||
def display_properties(values, transformers):
|
||||
print('GuessIt properties:')
|
||||
all_properties, transformers_properties = _supported_properties()
|
||||
if transformers:
|
||||
for transformer, properties_list in transformers_properties:
|
||||
print('[@] %s (%s)' % (transformer.name, transformer.priority))
|
||||
for property_name in properties_list:
|
||||
property_values = all_properties.get(property_name)
|
||||
print(' [+] %s' % (property_name,))
|
||||
if property_values and values:
|
||||
_display_property_values(property_name, indent=4)
|
||||
else:
|
||||
properties_list = []
|
||||
properties_list.extend(all_properties.keys())
|
||||
properties_list.sort()
|
||||
for property_name in properties_list:
|
||||
property_values = all_properties.get(property_name)
|
||||
print(' [+] %s' % (property_name,))
|
||||
if property_values and values:
|
||||
_display_property_values(property_name, indent=4)
|
||||
|
||||
|
||||
def _display_property_values(property_name, indent=2):
|
||||
all_properties, _ = _supported_properties()
|
||||
property_values = all_properties.get(property_name)
|
||||
for property_value in property_values:
|
||||
print(indent * ' ' + '[!] %s' % (property_value,))
|
||||
|
||||
|
||||
def run_demo(episodes=True, movies=True, options=None):
|
||||
def run_demo(episodes=True, movies=True, advanced=False):
|
||||
# NOTE: tests should not be added here but rather in the tests/ folder
|
||||
# this is just intended as a quick example
|
||||
if episodes:
|
||||
testeps = ['Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
|
||||
'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
|
||||
'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
|
||||
'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
|
||||
'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi',
|
||||
'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
|
||||
'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
|
||||
'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
|
||||
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
|
||||
]
|
||||
testeps = [ 'Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
|
||||
'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
|
||||
'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
|
||||
'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
|
||||
'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi',
|
||||
'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
|
||||
'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
|
||||
'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
|
||||
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
|
||||
]
|
||||
|
||||
for f in testeps:
|
||||
print('-' * 80)
|
||||
guess_file(f, options=options, type='episode')
|
||||
print('-'*80)
|
||||
detect_filename(f, filetype='episode', advanced=advanced)
|
||||
|
||||
|
||||
if movies:
|
||||
testmovies = ['Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
|
||||
'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
|
||||
'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
|
||||
'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
|
||||
'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
|
||||
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten
|
||||
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten
|
||||
'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
|
||||
'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
|
||||
'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
|
||||
'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv',
|
||||
'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi',
|
||||
'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi',
|
||||
'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi',
|
||||
'Movies/Juno (2007)/Juno KLAXXON.avi',
|
||||
'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv',
|
||||
'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt',
|
||||
'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi',
|
||||
'testsmewt_bugs/movies/Baraka_Edition_Collector.avi'
|
||||
]
|
||||
testmovies = [ 'Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
|
||||
'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
|
||||
'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
|
||||
'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
|
||||
'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
|
||||
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten
|
||||
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten
|
||||
'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
|
||||
'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
|
||||
'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
|
||||
'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv',
|
||||
'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi',
|
||||
'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi',
|
||||
'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi',
|
||||
'Movies/Juno (2007)/Juno KLAXXON.avi',
|
||||
'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv',
|
||||
'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt',
|
||||
'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi',
|
||||
'testsmewt_bugs/movies/Baraka_Edition_Collector.avi'
|
||||
]
|
||||
|
||||
for f in testmovies:
|
||||
print('-' * 80)
|
||||
guess_file(f, options=options, type='movie')
|
||||
print('-'*80)
|
||||
detect_filename(f, filetype = 'movie', advanced = advanced)
|
||||
|
||||
|
||||
def main(args=None, setup_logging=True):
|
||||
if setup_logging:
|
||||
from guessit import slogging
|
||||
slogging.setupLogging()
|
||||
def main():
|
||||
slogging.setupLogging()
|
||||
|
||||
if PY2: # pragma: no cover
|
||||
import codecs
|
||||
import locale
|
||||
import sys
|
||||
# see http://bugs.python.org/issue2128
|
||||
if sys.version_info.major < 3 and os.name == 'nt':
|
||||
for i, a in enumerate(sys.argv):
|
||||
sys.argv[i] = a.decode(locale.getpreferredencoding())
|
||||
|
||||
parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]')
|
||||
parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
|
||||
help = 'display debug output')
|
||||
parser.add_option('-i', '--info', dest = 'info', default = 'filename',
|
||||
help = 'the desired information type: filename, hash_mpc or a hash from python\'s '
|
||||
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
|
||||
'them, comma-separated')
|
||||
parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect',
|
||||
help = 'the suggested file type: movie, episode or autodetect')
|
||||
parser.add_option('-a', '--advanced', dest = 'advanced', action='store_true', default = False,
|
||||
help = 'display advanced information for filename guesses, as json output')
|
||||
parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
|
||||
help = 'run a few builtin tests instead of analyzing a file')
|
||||
|
||||
# see http://bugs.python.org/issue2128
|
||||
if os.name == 'nt':
|
||||
for i, a in enumerate(sys.argv):
|
||||
sys.argv[i] = a.decode(locale.getpreferredencoding())
|
||||
|
||||
# see https://github.com/wackou/guessit/issues/43
|
||||
# and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
|
||||
# Wrap sys.stdout into a StreamWriter to allow writing unicode.
|
||||
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
|
||||
|
||||
if args:
|
||||
options, args = option_parser.parse_args(args)
|
||||
else: # pragma: no cover
|
||||
options, args = option_parser.parse_args()
|
||||
options, args = parser.parse_args()
|
||||
if options.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
logging.getLogger('guessit').setLevel(logging.DEBUG)
|
||||
|
||||
help_required = True
|
||||
if options.properties or options.values:
|
||||
display_properties(options.values, options.transformers)
|
||||
help_required = False
|
||||
elif options.transformers:
|
||||
display_transformers()
|
||||
help_required = False
|
||||
if options.demo:
|
||||
run_demo(episodes=True, movies=True, options=vars(options))
|
||||
help_required = False
|
||||
run_demo(episodes=True, movies=True, advanced=options.advanced)
|
||||
else:
|
||||
if args:
|
||||
help_required = False
|
||||
for filename in args:
|
||||
guess_file(filename,
|
||||
info=options.info.split(','),
|
||||
options=vars(options)
|
||||
)
|
||||
detect_filename(filename,
|
||||
filetype = options.filetype,
|
||||
info = options.info.split(','),
|
||||
advanced = options.advanced)
|
||||
|
||||
if help_required: # pragma: no cover
|
||||
option_parser.print_help()
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
__version__ = '0.7.1'
|
|
@ -1,615 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from .patterns import compile_pattern, sep
|
||||
from . import base_text_type
|
||||
from .guess import Guess
|
||||
import types
|
||||
|
||||
|
||||
def _get_span(prop, match):
|
||||
"""Retrieves span for a match"""
|
||||
if not prop.global_span and match.re.groups:
|
||||
start = None
|
||||
end = None
|
||||
for i in range(1, match.re.groups + 1):
|
||||
span = match.span(i)
|
||||
if start is None or span[0] < start:
|
||||
start = span[0]
|
||||
if end is None or span[1] > end:
|
||||
end = span[1]
|
||||
return (start, end)
|
||||
else:
|
||||
return match.span()
|
||||
start = span[0]
|
||||
end = span[1]
|
||||
|
||||
|
||||
def _get_groups(compiled_re):
|
||||
"""
|
||||
Retrieves groups from re
|
||||
|
||||
:return: list of group names
|
||||
"""
|
||||
if compiled_re.groups:
|
||||
indexgroup = {}
|
||||
for k, i in compiled_re.groupindex.items():
|
||||
indexgroup[i] = k
|
||||
ret = []
|
||||
for i in range(1, compiled_re.groups + 1):
|
||||
ret.append(indexgroup.get(i, i))
|
||||
return ret
|
||||
else:
|
||||
return [None]
|
||||
|
||||
|
||||
class NoValidator(object):
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
return True
|
||||
|
||||
|
||||
class DefaultValidator(object):
|
||||
"""Make sure our match is surrounded by separators, or by another entry"""
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
start, end = _get_span(prop, match)
|
||||
|
||||
sep_start = start <= 0 or string[start - 1] in sep
|
||||
sep_end = end >= len(string) or string[end] in sep
|
||||
start_by_other = start in entry_end
|
||||
end_by_other = end in entry_start
|
||||
if (sep_start or start_by_other) and (sep_end or end_by_other):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class WeakValidator(DefaultValidator):
|
||||
"""Make sure our match is surrounded by separators and is the first or last element in the string"""
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
if super(WeakValidator, self).validate(prop, string, node, match, entry_start, entry_end):
|
||||
span = match.span()
|
||||
start = span[0]
|
||||
end = span[1]
|
||||
|
||||
at_start = True
|
||||
at_end = True
|
||||
|
||||
while start > 0:
|
||||
start = start - 1
|
||||
if string[start] not in sep:
|
||||
at_start = False
|
||||
break
|
||||
if at_start:
|
||||
return True
|
||||
while end < len(string) - 1:
|
||||
end = end + 1
|
||||
if string[end] not in sep:
|
||||
at_end = False
|
||||
break
|
||||
if at_end:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class LeavesValidator(DefaultValidator):
|
||||
def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
|
||||
self.previous_lambdas = previous_lambdas if not previous_lambdas is None else []
|
||||
self.next_lambdas = next_lambdas if not next_lambdas is None else []
|
||||
if lambdas:
|
||||
self.previous_lambdas.extend(lambdas)
|
||||
self.next_lambdas.extend(lambdas)
|
||||
self.both_side = both_side
|
||||
self.default_ = default_
|
||||
|
||||
"""Make sure our match is surrounded by separators and validates defined lambdas"""
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
if self.default_:
|
||||
super_ret = super(LeavesValidator, self).validate(prop, string, node, match, entry_start, entry_end)
|
||||
else:
|
||||
super_ret = True
|
||||
if not super_ret:
|
||||
return False
|
||||
|
||||
previous_ = self._validate_previous(prop, string, node, match, entry_start, entry_end)
|
||||
if previous_ and self.both_side:
|
||||
return previous_
|
||||
next_ = self._validate_next(prop, string, node, match, entry_start, entry_end)
|
||||
|
||||
if previous_ is None and next_ is None:
|
||||
return super_ret
|
||||
|
||||
if self.both_side:
|
||||
return previous_ and next_
|
||||
else:
|
||||
return previous_ or next_
|
||||
|
||||
def _validate_previous(self, prop, string, node, match, entry_start, entry_end):
|
||||
if self.previous_lambdas:
|
||||
for leaf in node.root.previous_leaves(node):
|
||||
for lambda_ in self.previous_lambdas:
|
||||
ret = self._check_rule(lambda_, leaf)
|
||||
if not ret is None:
|
||||
return ret
|
||||
return False
|
||||
|
||||
def _validate_next(self, prop, string, node, match, entry_start, entry_end):
|
||||
if self.next_lambdas:
|
||||
for leaf in node.root.next_leaves(node):
|
||||
for lambda_ in self.next_lambdas:
|
||||
ret = self._check_rule(lambda_, leaf)
|
||||
if not ret is None:
|
||||
return ret
|
||||
return False
|
||||
|
||||
def _check_rule(self, lambda_, previous_leaf):
|
||||
return lambda_(previous_leaf)
|
||||
|
||||
|
||||
class _Property:
|
||||
"""Represents a property configuration."""
|
||||
def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None):
|
||||
"""
|
||||
:param keys: Keys of the property (format, screenSize, ...)
|
||||
:type keys: string
|
||||
:param canonical_form: Unique value of the property (DVD, 720p, ...)
|
||||
:type canonical_form: string
|
||||
:param pattern: Regexp pattern
|
||||
:type pattern: string
|
||||
:param confidence: confidence
|
||||
:type confidence: float
|
||||
:param enhance: enhance the pattern
|
||||
:type enhance: boolean
|
||||
:param global_span: if True, the whole match span will used to create the Guess.
|
||||
Else, the span from the capturing groups will be used.
|
||||
:type global_span: boolean
|
||||
:param validator: Validator to use
|
||||
:type validator: :class:`DefaultValidator`
|
||||
:param formatter: Formater to use
|
||||
:type formatter: function
|
||||
"""
|
||||
if isinstance(keys, list):
|
||||
self.keys = keys
|
||||
elif isinstance(keys, base_text_type):
|
||||
self.keys = [keys]
|
||||
else:
|
||||
self.keys = []
|
||||
self.canonical_form = canonical_form
|
||||
if not pattern is None:
|
||||
self.pattern = pattern
|
||||
else:
|
||||
self.pattern = canonical_form
|
||||
if self.canonical_form is None and canonical_from_pattern:
|
||||
self.canonical_form = self.pattern
|
||||
self.compiled = compile_pattern(self.pattern, enhance=enhance)
|
||||
for group_name in _get_groups(self.compiled):
|
||||
if isinstance(group_name, base_text_type) and not group_name in self.keys:
|
||||
self.keys.append(group_name)
|
||||
if not self.keys:
|
||||
raise ValueError("No property key is defined")
|
||||
self.confidence = confidence
|
||||
self.global_span = global_span
|
||||
self.validator = validator
|
||||
self.formatter = formatter
|
||||
|
||||
def format(self, value, group_name=None):
|
||||
"""Retrieves the final value from re group match value"""
|
||||
formatter = None
|
||||
if isinstance(self.formatter, dict):
|
||||
formatter = self.formatter.get(group_name)
|
||||
if formatter is None and not group_name is None:
|
||||
formatter = self.formatter.get(None)
|
||||
else:
|
||||
formatter = self.formatter
|
||||
if isinstance(formatter, types.FunctionType):
|
||||
return formatter(value)
|
||||
elif not formatter is None:
|
||||
return formatter.format(value)
|
||||
return value
|
||||
|
||||
def __repr__(self):
|
||||
return "%s: %s" % (self.keys, self.canonical_form if self.canonical_form else self.pattern)
|
||||
|
||||
|
||||
class PropertiesContainer(object):
|
||||
def __init__(self, **kwargs):
|
||||
self._properties = []
|
||||
self.default_property_kwargs = kwargs
|
||||
|
||||
def unregister_property(self, name, *canonical_forms):
|
||||
"""Unregister a property canonical forms
|
||||
|
||||
If canonical_forms are specified, only those values will be unregistered
|
||||
|
||||
:param name: Property name to unregister
|
||||
:type name: string
|
||||
:param canonical_forms: Values to unregister
|
||||
:type canonical_forms: varargs of string
|
||||
"""
|
||||
_properties = [prop for prop in self._properties if prop.name == name and (not canonical_forms or prop.canonical_form in canonical_forms)]
|
||||
|
||||
def register_property(self, name, *patterns, **property_params):
|
||||
"""Register property with defined canonical form and patterns.
|
||||
|
||||
:param name: name of the property (format, screenSize, ...)
|
||||
:type name: string
|
||||
:param patterns: regular expression patterns to register for the property canonical_form
|
||||
:type patterns: varargs of string
|
||||
"""
|
||||
properties = []
|
||||
for pattern in patterns:
|
||||
params = dict(self.default_property_kwargs)
|
||||
params.update(property_params)
|
||||
if isinstance(pattern, dict):
|
||||
params.update(pattern)
|
||||
prop = _Property(name, **params)
|
||||
else:
|
||||
prop = _Property(name, pattern, **params)
|
||||
self._properties.append(prop)
|
||||
properties.append(prop)
|
||||
return properties
|
||||
|
||||
def register_canonical_properties(self, name, *canonical_forms, **property_params):
|
||||
"""Register properties from their canonical forms.
|
||||
|
||||
:param name: name of the property (releaseGroup, ...)
|
||||
:type name: string
|
||||
:param canonical_forms: values of the property ('ESiR', 'WAF', 'SEPTiC', ...)
|
||||
:type canonical_forms: varargs of strings
|
||||
"""
|
||||
properties = []
|
||||
for canonical_form in canonical_forms:
|
||||
params = dict(property_params)
|
||||
params['canonical_form'] = canonical_form
|
||||
properties.extend(self.register_property(name, canonical_form, **property_params))
|
||||
return properties
|
||||
|
||||
def unregister_all_properties(self):
|
||||
"""Unregister all defined properties"""
|
||||
self._properties.clear()
|
||||
|
||||
def find_properties(self, string, node, name=None, validate=True, re_match=False, sort=True, multiple=False):
|
||||
"""Find all distinct properties for given string
|
||||
|
||||
If no capturing group is defined in the property, value will be grabbed from the entire match.
|
||||
|
||||
If one ore more unnamed capturing group is defined in the property, first capturing group will be used.
|
||||
|
||||
If named capturing group are defined in the property, they will be returned as property key.
|
||||
|
||||
If validate, found properties will be validated by their defined validator
|
||||
|
||||
If re_match, re.match will be used instead of re.search.
|
||||
|
||||
if sort, found properties will be sorted from longer match to shorter match.
|
||||
|
||||
If multiple is False and multiple values are found for the same property, the more confident one will be returned.
|
||||
|
||||
If multiple is False and multiple values are found for the same property and the same confidence, the longer will be returned.
|
||||
|
||||
:param string: input string
|
||||
:type string: string
|
||||
|
||||
:param node: current node of the matching tree
|
||||
:type node: :class:`guessit.matchtree.MatchTree`
|
||||
|
||||
:param name: name of property to find
|
||||
:type name: string
|
||||
|
||||
:param re_match: use re.match instead of re.search
|
||||
:type re_match: bool
|
||||
|
||||
:param multiple: Allows multiple property values to be returned
|
||||
:type multiple: bool
|
||||
|
||||
:return: found properties
|
||||
:rtype: list of tuples (:class:`_Property`, match, list of tuples (property_name, tuple(value_start, value_end)))
|
||||
|
||||
:see: `_Property`
|
||||
:see: `register_property`
|
||||
:see: `register_canonical_properties`
|
||||
"""
|
||||
entry_start = {}
|
||||
entry_end = {}
|
||||
|
||||
entries = []
|
||||
|
||||
ret = []
|
||||
|
||||
if not string.strip():
|
||||
return ret
|
||||
|
||||
# search all properties
|
||||
for prop in self.get_properties(name):
|
||||
match = prop.compiled.match(string) if re_match else prop.compiled.search(string)
|
||||
if match:
|
||||
entry = prop, match
|
||||
entries.append(entry)
|
||||
|
||||
if validate:
|
||||
# compute entries start and ends
|
||||
for prop, match in entries:
|
||||
start, end = _get_span(prop, match)
|
||||
|
||||
if start not in entry_start:
|
||||
entry_start[start] = [prop]
|
||||
else:
|
||||
entry_start[start].append(prop)
|
||||
|
||||
if end not in entry_end:
|
||||
entry_end[end] = [prop]
|
||||
else:
|
||||
entry_end[end].append(prop)
|
||||
|
||||
# remove invalid values
|
||||
while True:
|
||||
invalid_entries = []
|
||||
for entry in entries:
|
||||
prop, match = entry
|
||||
if not prop.validator.validate(prop, string, node, match, entry_start, entry_end):
|
||||
invalid_entries.append(entry)
|
||||
if not invalid_entries:
|
||||
break
|
||||
for entry in invalid_entries:
|
||||
prop, match = entry
|
||||
entries.remove(entry)
|
||||
invalid_span = _get_span(prop, match)
|
||||
start = invalid_span[0]
|
||||
end = invalid_span[1]
|
||||
entry_start[start].remove(prop)
|
||||
if not entry_start.get(start):
|
||||
del entry_start[start]
|
||||
entry_end[end].remove(prop)
|
||||
if not entry_end.get(end):
|
||||
del entry_end[end]
|
||||
|
||||
if multiple:
|
||||
ret = entries
|
||||
else:
|
||||
# keep only best match if multiple values where found
|
||||
entries_dict = {}
|
||||
for entry in entries:
|
||||
for key in prop.keys:
|
||||
if not key in entries_dict:
|
||||
entries_dict[key] = []
|
||||
entries_dict[key].append(entry)
|
||||
|
||||
for entries in entries_dict.values():
|
||||
if multiple:
|
||||
for entry in entries:
|
||||
ret.append(entry)
|
||||
else:
|
||||
best_ret = {}
|
||||
|
||||
best_prop, best_match = None, None
|
||||
if len(entries) == 1:
|
||||
best_prop, best_match = entries[0]
|
||||
else:
|
||||
for prop, match in entries:
|
||||
start, end = _get_span(prop, match)
|
||||
if not best_prop or \
|
||||
best_prop.confidence < best_prop.confidence or \
|
||||
best_prop.confidence == best_prop.confidence and \
|
||||
best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
|
||||
best_prop, best_match = prop, match
|
||||
|
||||
best_ret[best_prop] = best_match
|
||||
|
||||
for prop, match in best_ret.items():
|
||||
ret.append((prop, match))
|
||||
|
||||
if sort:
|
||||
def _sorting(x):
|
||||
_, x_match = x
|
||||
x_start, x_end = x_match.span()
|
||||
return (x_start - x_end)
|
||||
|
||||
ret.sort(key=_sorting)
|
||||
|
||||
return ret
|
||||
|
||||
def as_guess(self, found_properties, input=None, filter=None, sep_replacement=None, multiple=False, *args, **kwargs):
|
||||
if filter is None:
|
||||
filter = lambda property, *args, **kwargs: True
|
||||
guesses = [] if multiple else None
|
||||
for property in found_properties:
|
||||
prop, match = property
|
||||
first_key = None
|
||||
for key in prop.keys:
|
||||
# First property key will be used as base for effective name
|
||||
if isinstance(key, base_text_type):
|
||||
if first_key is None:
|
||||
first_key = key
|
||||
break
|
||||
property_name = first_key if first_key else None
|
||||
span = _get_span(prop, match)
|
||||
guess = Guess(confidence=prop.confidence, input=input, span=span, prop=property_name)
|
||||
groups = _get_groups(match.re)
|
||||
for group_name in groups:
|
||||
name = group_name if isinstance(group_name, base_text_type) else property_name if property_name not in groups else None
|
||||
if name:
|
||||
value = self._effective_prop_value(prop, group_name, input, match.span(group_name) if group_name else match.span(), sep_replacement)
|
||||
if not value is None:
|
||||
is_string = isinstance(value, base_text_type)
|
||||
if not is_string or is_string and value: # Keep non empty strings and other defined objects
|
||||
if isinstance(value, dict):
|
||||
for k, v in value.items():
|
||||
if k is None:
|
||||
k = name
|
||||
guess[k] = v
|
||||
else:
|
||||
guess[name] = value
|
||||
if group_name:
|
||||
guess.metadata(prop).span = match.span(group_name)
|
||||
if filter(guess):
|
||||
if multiple:
|
||||
guesses.append(guess)
|
||||
else:
|
||||
return guess
|
||||
return guesses
|
||||
|
||||
def _effective_prop_value(self, prop, group_name, input=None, span=None, sep_replacement=None):
|
||||
if prop.canonical_form:
|
||||
return prop.canonical_form
|
||||
if input is None:
|
||||
return None
|
||||
value = input
|
||||
if not span is None:
|
||||
value = value[span[0]:span[1]]
|
||||
value = input[span[0]:span[1]] if input else None
|
||||
if sep_replacement:
|
||||
for sep_char in sep:
|
||||
value = value.replace(sep_char, sep_replacement)
|
||||
if value:
|
||||
value = prop.format(value, group_name)
|
||||
return value
|
||||
|
||||
def get_properties(self, name=None, canonical_form=None):
|
||||
"""Retrieve properties
|
||||
|
||||
:return: Properties
|
||||
:rtype: generator
|
||||
"""
|
||||
for prop in self._properties:
|
||||
if (name is None or name in prop.keys) and (canonical_form is None or prop.canonical_form == canonical_form):
|
||||
yield prop
|
||||
|
||||
def get_supported_properties(self):
|
||||
supported_properties = {}
|
||||
for prop in self.get_properties():
|
||||
for k in prop.keys:
|
||||
values = supported_properties.get(k)
|
||||
if not values:
|
||||
values = set()
|
||||
supported_properties[k] = values
|
||||
if prop.canonical_form:
|
||||
values.add(prop.canonical_form)
|
||||
return supported_properties
|
||||
|
||||
|
||||
class QualitiesContainer():
|
||||
def __init__(self):
|
||||
self._qualities = {}
|
||||
|
||||
def register_quality(self, name, canonical_form, rating):
|
||||
"""Register a quality rating.
|
||||
|
||||
:param name: Name of the property
|
||||
:type name: string
|
||||
:param canonical_form: Value of the property
|
||||
:type canonical_form: string
|
||||
:param rating: Estimated quality rating for the property
|
||||
:type rating: int
|
||||
"""
|
||||
property_qualities = self._qualities.get(name)
|
||||
|
||||
if property_qualities is None:
|
||||
property_qualities = {}
|
||||
self._qualities[name] = property_qualities
|
||||
|
||||
property_qualities[canonical_form] = rating
|
||||
|
||||
def unregister_quality(self, name, *canonical_forms):
|
||||
"""Unregister quality ratings for given property name.
|
||||
|
||||
If canonical_forms are specified, only those values will be unregistered
|
||||
|
||||
:param name: Name of the property
|
||||
:type name: string
|
||||
:param canonical_forms: Value of the property
|
||||
:type canonical_forms: string
|
||||
"""
|
||||
if not canonical_forms:
|
||||
if name in self._qualities:
|
||||
del self._qualities[name]
|
||||
else:
|
||||
property_qualities = self._qualities.get(name)
|
||||
if not property_qualities is None:
|
||||
for property_canonical_form in canonical_forms:
|
||||
if property_canonical_form in property_qualities:
|
||||
del property_qualities[property_canonical_form]
|
||||
if not property_qualities:
|
||||
del self._qualities[name]
|
||||
|
||||
def clear_qualities(self,):
|
||||
"""Unregister all defined quality ratings.
|
||||
"""
|
||||
self._qualities.clear()
|
||||
|
||||
def rate_quality(self, guess, *props):
|
||||
"""Rate the quality of guess.
|
||||
|
||||
:param guess: Guess to rate
|
||||
:type guess: :class:`guessit.guess.Guess`
|
||||
:param props: Properties to include in the rating. if empty, rating will be performed for all guess properties.
|
||||
:type props: varargs of string
|
||||
|
||||
:return: Quality of the guess. The higher, the better.
|
||||
:rtype: int
|
||||
"""
|
||||
rate = 0
|
||||
if not props:
|
||||
props = guess.keys()
|
||||
for prop in props:
|
||||
prop_value = guess.get(prop)
|
||||
prop_qualities = self._qualities.get(prop)
|
||||
if not prop_value is None and not prop_qualities is None:
|
||||
rate += prop_qualities.get(prop_value, 0)
|
||||
return rate
|
||||
|
||||
def best_quality_properties(self, props, *guesses):
|
||||
"""Retrieve the best quality guess, based on given properties
|
||||
|
||||
:param props: Properties to include in the rating
|
||||
:type props: list of strings
|
||||
:param guesses: Guesses to rate
|
||||
:type guesses: :class:`guessit.guess.Guess`
|
||||
|
||||
:return: Best quality guess from all passed guesses
|
||||
:rtype: :class:`guessit.guess.Guess`
|
||||
"""
|
||||
best_guess = None
|
||||
best_rate = None
|
||||
for guess in guesses:
|
||||
rate = self.rate_quality(guess, *props)
|
||||
if best_rate is None or best_rate < rate:
|
||||
best_rate = rate
|
||||
best_guess = guess
|
||||
return best_guess
|
||||
|
||||
def best_quality(self, *guesses):
|
||||
"""Retrieve the best quality guess.
|
||||
|
||||
:param guesses: Guesses to rate
|
||||
:type guesses: :class:`guessit.guess.Guess`
|
||||
|
||||
:return: Best quality guess from all passed guesses
|
||||
:rtype: :class:`guessit.guess.Guess`
|
||||
"""
|
||||
best_guess = None
|
||||
best_rate = None
|
||||
for guess in guesses:
|
||||
rate = self.rate_quality(guess)
|
||||
if best_rate is None or best_rate < rate:
|
||||
best_rate = rate
|
||||
best_guess = guess
|
||||
return best_guess
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,13 +18,12 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import UnicodeMixin, base_text_type, u
|
||||
from guessit.fileutils import load_file_in_same_dir
|
||||
import logging
|
||||
|
||||
__all__ = ['Country']
|
||||
__all__ = [ 'Country' ]
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
@ -37,12 +36,12 @@ log = logging.getLogger(__name__)
|
|||
# are all separated by pipe (|) characters."
|
||||
_iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt')
|
||||
|
||||
country_matrix = [l.strip().split('|')
|
||||
for l in _iso3166_contents.strip().split('\n')]
|
||||
country_matrix = [ l.strip().split('|')
|
||||
for l in _iso3166_contents.strip().split('\n') ]
|
||||
|
||||
country_matrix += [['Unknown', 'un', 'unk', '', ''],
|
||||
['Latin America', '', 'lat', '', '']
|
||||
]
|
||||
country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ],
|
||||
[ 'Latin America', '', 'lat', '', '' ]
|
||||
]
|
||||
|
||||
country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
|
||||
country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
|
||||
|
@ -50,16 +49,17 @@ country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matr
|
|||
|
||||
# add here exceptions / non ISO representations
|
||||
# Note: remember to put those exceptions in lower-case, they won't work otherwise
|
||||
country_to_alpha3.update({'latinoamérica': 'lat',
|
||||
'brazilian': 'bra',
|
||||
'españa': 'esp',
|
||||
'uk': 'gbr'
|
||||
})
|
||||
country_to_alpha3.update({ 'latinoamérica': 'lat',
|
||||
'brazilian': 'bra',
|
||||
'españa': 'esp',
|
||||
'uk': 'gbr'
|
||||
})
|
||||
|
||||
country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
|
||||
country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
|
||||
|
||||
|
||||
|
||||
class Country(UnicodeMixin):
|
||||
"""This class represents a country.
|
||||
|
||||
|
@ -78,6 +78,7 @@ class Country(UnicodeMixin):
|
|||
if self.alpha3 is None:
|
||||
self.alpha3 = 'unk'
|
||||
|
||||
|
||||
@property
|
||||
def alpha2(self):
|
||||
return country_alpha3_to_alpha2[self.alpha3]
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,55 +18,15 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import datetime
|
||||
import re
|
||||
import math
|
||||
|
||||
|
||||
_dsep = r'[-/ \.]'
|
||||
_date_rexps = [re.compile(
|
||||
# 20010823
|
||||
r'[^0-9]' +
|
||||
r'(?P<year>[0-9]{4})' +
|
||||
r'(?P<month>[0-9]{2})' +
|
||||
r'(?P<day>[0-9]{2})' +
|
||||
r'[^0-9]'),
|
||||
|
||||
# 2001-08-23
|
||||
re.compile(r'[^0-9]' +
|
||||
r'(?P<year>[0-9]{4})' + _dsep +
|
||||
r'(?P<month>[0-9]{2})' + _dsep +
|
||||
r'(?P<day>[0-9]{2})' +
|
||||
r'[^0-9]'),
|
||||
|
||||
# 23-08-2001
|
||||
re.compile(r'[^0-9]' +
|
||||
r'(?P<day>[0-9]{2})' + _dsep +
|
||||
r'(?P<month>[0-9]{2})' + _dsep +
|
||||
r'(?P<year>[0-9]{4})' +
|
||||
r'[^0-9]'),
|
||||
|
||||
# 23-08-01
|
||||
re.compile(r'[^0-9]' +
|
||||
r'(?P<day>[0-9]{2})' + _dsep +
|
||||
r'(?P<month>[0-9]{2})' + _dsep +
|
||||
r'(?P<year>[0-9]{2})' +
|
||||
r'[^0-9]'),
|
||||
]
|
||||
|
||||
|
||||
def valid_year(year, today=None):
|
||||
"""Check if number is a valid year"""
|
||||
if not today:
|
||||
today = datetime.date.today()
|
||||
return 1920 < year < today.year + 5
|
||||
|
||||
def valid_year(year):
|
||||
return 1920 < year < datetime.date.today().year + 5
|
||||
|
||||
def search_year(string):
|
||||
"""Looks for year patterns, and if found return the year and group span.
|
||||
|
||||
Assumes there are sentinels at the beginning and end of the string that
|
||||
always allow matching a non-digit delimiting the date.
|
||||
|
||||
|
@ -74,10 +34,10 @@ def search_year(string):
|
|||
and now + 5 years, so for instance 2000 would be returned as a valid
|
||||
year but 1492 would not.
|
||||
|
||||
>>> search_year(' in the year 2000... ')
|
||||
(2000, (13, 17))
|
||||
>>> search_year('in the year 2000...')
|
||||
(2000, (12, 16))
|
||||
|
||||
>>> search_year(' they arrived in 1492. ')
|
||||
>>> search_year('they arrived in 1492.')
|
||||
(None, None)
|
||||
"""
|
||||
match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
|
||||
|
@ -91,32 +51,59 @@ def search_year(string):
|
|||
|
||||
def search_date(string):
|
||||
"""Looks for date patterns, and if found return the date and group span.
|
||||
|
||||
Assumes there are sentinels at the beginning and end of the string that
|
||||
always allow matching a non-digit delimiting the date.
|
||||
|
||||
Year can be defined on two digit only. It will return the nearest possible
|
||||
date from today.
|
||||
>>> search_date('This happened on 2002-04-22.')
|
||||
(datetime.date(2002, 4, 22), (17, 27))
|
||||
|
||||
>>> search_date(' This happened on 2002-04-22. ')
|
||||
(datetime.date(2002, 4, 22), (18, 28))
|
||||
>>> search_date('And this on 17-06-1998.')
|
||||
(datetime.date(1998, 6, 17), (12, 22))
|
||||
|
||||
>>> search_date(' And this on 17-06-1998. ')
|
||||
(datetime.date(1998, 6, 17), (13, 23))
|
||||
|
||||
>>> search_date(' no date in here ')
|
||||
>>> search_date('no date in here')
|
||||
(None, None)
|
||||
"""
|
||||
|
||||
today = datetime.date.today()
|
||||
for drexp in _date_rexps:
|
||||
dsep = r'[-/ \.]'
|
||||
|
||||
date_rexps = [
|
||||
# 20010823
|
||||
r'[^0-9]' +
|
||||
r'(?P<year>[0-9]{4})' +
|
||||
r'(?P<month>[0-9]{2})' +
|
||||
r'(?P<day>[0-9]{2})' +
|
||||
r'[^0-9]',
|
||||
|
||||
# 2001-08-23
|
||||
r'[^0-9]' +
|
||||
r'(?P<year>[0-9]{4})' + dsep +
|
||||
r'(?P<month>[0-9]{2})' + dsep +
|
||||
r'(?P<day>[0-9]{2})' +
|
||||
r'[^0-9]',
|
||||
|
||||
# 23-08-2001
|
||||
r'[^0-9]' +
|
||||
r'(?P<day>[0-9]{2})' + dsep +
|
||||
r'(?P<month>[0-9]{2})' + dsep +
|
||||
r'(?P<year>[0-9]{4})' +
|
||||
r'[^0-9]',
|
||||
|
||||
# 23-08-01
|
||||
r'[^0-9]' +
|
||||
r'(?P<day>[0-9]{2})' + dsep +
|
||||
r'(?P<month>[0-9]{2})' + dsep +
|
||||
r'(?P<year>[0-9]{2})' +
|
||||
r'[^0-9]',
|
||||
]
|
||||
|
||||
for drexp in date_rexps:
|
||||
match = re.search(drexp, string)
|
||||
if match:
|
||||
d = match.groupdict()
|
||||
year, month, day = int(d['year']), int(d['month']), int(d['day'])
|
||||
# years specified as 2 digits should be adjusted here
|
||||
if year < 100:
|
||||
if year > (today.year % 100) + 5:
|
||||
if year > (datetime.date.today().year % 100) + 5:
|
||||
year = 1900 + year
|
||||
else:
|
||||
year = 2000 + year
|
||||
|
@ -134,7 +121,7 @@ def search_date(string):
|
|||
continue
|
||||
|
||||
# check date plausibility
|
||||
if not valid_year(date.year, today=today):
|
||||
if not 1900 < date.year < datetime.date.today().year + 5:
|
||||
continue
|
||||
|
||||
# looks like we have a valid date
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,8 +18,7 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import s, u
|
||||
import os.path
|
||||
import zipfile
|
||||
|
@ -45,13 +44,17 @@ def split_path(path):
|
|||
result = []
|
||||
while True:
|
||||
head, tail = os.path.split(path)
|
||||
headlen = len(head)
|
||||
|
||||
if not head and not tail:
|
||||
return result
|
||||
# on Unix systems, the root folder is '/'
|
||||
if head and head == '/'*headlen and tail == '':
|
||||
return ['/'] + result
|
||||
|
||||
if not tail and head == path:
|
||||
# Make sure we won't have an infinite loop.
|
||||
result = [head] + result
|
||||
# on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\
|
||||
if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '':
|
||||
return [head] + result
|
||||
|
||||
if head == '' and tail == '':
|
||||
return result
|
||||
|
||||
# we just split a directory ending with '/', so tail is empty
|
||||
|
@ -67,8 +70,8 @@ def split_path(path):
|
|||
def file_in_same_dir(ref_file, desired_file):
|
||||
"""Return the path for a file in the same dir as a given reference file.
|
||||
|
||||
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings')) == os.path.normpath('~/smewt/smewt.settings')
|
||||
True
|
||||
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings'))
|
||||
'~/smewt/smewt.settings'
|
||||
|
||||
"""
|
||||
return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,9 +18,10 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import UnicodeMixin, s, u, base_text_type
|
||||
from guessit.language import Language
|
||||
from guessit.country import Country
|
||||
import json
|
||||
import datetime
|
||||
import logging
|
||||
|
@ -28,103 +29,6 @@ import logging
|
|||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessMetadata(object):
|
||||
"""GuessMetadata contains confidence, an input string, span and related property.
|
||||
|
||||
If defined on a property of Guess object, it overrides the object defined as global.
|
||||
|
||||
:param parent: The parent metadata, used for undefined properties in self object
|
||||
:type parent: :class: `GuessMedata`
|
||||
:param confidence: The confidence (from 0.0 to 1.0)
|
||||
:type confidence: number
|
||||
:param input: The input string
|
||||
:type input: string
|
||||
:param span: The input string
|
||||
:type span: tuple (int, int)
|
||||
:param prop: The found property definition
|
||||
:type prop: :class `guessit.containers._Property`
|
||||
"""
|
||||
def __init__(self, parent=None, confidence=None, input=None, span=None, prop=None, *args, **kwargs):
|
||||
self.parent = parent
|
||||
if confidence is None and self.parent is None:
|
||||
self._confidence = 1.0
|
||||
else:
|
||||
self._confidence = confidence
|
||||
self._input = input
|
||||
self._span = span
|
||||
self._prop = prop
|
||||
|
||||
@property
|
||||
def confidence(self):
|
||||
"""The confidence
|
||||
|
||||
:rtype: int
|
||||
:return: confidence value
|
||||
"""
|
||||
return self._confidence if not self._confidence is None else self.parent.confidence if self.parent else None
|
||||
|
||||
@confidence.setter
|
||||
def confidence(self, confidence):
|
||||
self._confidence = confidence
|
||||
|
||||
@property
|
||||
def input(self):
|
||||
"""The input
|
||||
|
||||
:rtype: string
|
||||
:return: String used to find this guess value
|
||||
"""
|
||||
return self._input if not self._input is None else self.parent.input if self.parent else None
|
||||
|
||||
@property
|
||||
def span(self):
|
||||
"""The span
|
||||
|
||||
:rtype: tuple (int, int)
|
||||
:return: span of input string used to find this guess value
|
||||
"""
|
||||
return self._span if not self._span is None else self.parent.span if self.parent else None
|
||||
|
||||
@span.setter
|
||||
def span(self, span):
|
||||
"""The span
|
||||
|
||||
:rtype: tuple (int, int)
|
||||
:return: span of input string used to find this guess value
|
||||
"""
|
||||
self._span = span
|
||||
|
||||
@property
|
||||
def prop(self):
|
||||
"""The property
|
||||
|
||||
:rtype: :class:`_Property`
|
||||
:return: The property
|
||||
"""
|
||||
return self._prop if not self._prop is None else self.parent.prop if self.parent else None
|
||||
|
||||
@property
|
||||
def raw(self):
|
||||
"""Return the raw information (original match from the string,
|
||||
not the cleaned version) associated with the given property name."""
|
||||
if self.input and self.span:
|
||||
return self.input[self.span[0]:self.span[1]]
|
||||
return None
|
||||
|
||||
def __repr__(self, *args, **kwargs):
|
||||
return object.__repr__(self, *args, **kwargs)
|
||||
|
||||
|
||||
def _split_kwargs(**kwargs):
|
||||
metadata_args = {}
|
||||
for prop in dir(GuessMetadata):
|
||||
try:
|
||||
metadata_args[prop] = kwargs.pop(prop)
|
||||
except KeyError:
|
||||
pass
|
||||
return metadata_args, kwargs
|
||||
|
||||
|
||||
class Guess(UnicodeMixin, dict):
|
||||
"""A Guess is a dictionary which has an associated confidence for each of
|
||||
its values.
|
||||
|
@ -133,98 +37,91 @@ class Guess(UnicodeMixin, dict):
|
|||
simple dict."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
metadata_kwargs, kwargs = _split_kwargs(**kwargs)
|
||||
self._global_metadata = GuessMetadata(**metadata_kwargs)
|
||||
try:
|
||||
confidence = kwargs.pop('confidence')
|
||||
except KeyError:
|
||||
confidence = 0
|
||||
|
||||
try:
|
||||
raw = kwargs.pop('raw')
|
||||
except KeyError:
|
||||
raw = None
|
||||
|
||||
dict.__init__(self, *args, **kwargs)
|
||||
|
||||
self._metadata = {}
|
||||
self._confidence = {}
|
||||
self._raw = {}
|
||||
for prop in self:
|
||||
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
|
||||
|
||||
self._confidence[prop] = confidence
|
||||
self._raw[prop] = raw
|
||||
|
||||
def to_dict(self, advanced=False):
|
||||
"""Return the guess as a dict containing only base types, ie:
|
||||
where dates, languages, countries, etc. are converted to strings.
|
||||
|
||||
if advanced is True, return the data as a json string containing
|
||||
also the raw information of the properties."""
|
||||
data = dict(self)
|
||||
for prop, value in data.items():
|
||||
if isinstance(value, datetime.date):
|
||||
data[prop] = value.isoformat()
|
||||
elif isinstance(value, (UnicodeMixin, base_text_type)):
|
||||
elif isinstance(value, (Language, Country, base_text_type)):
|
||||
data[prop] = u(value)
|
||||
elif isinstance(value, list):
|
||||
data[prop] = [u(x) for x in value]
|
||||
if advanced:
|
||||
metadata = self.metadata(prop)
|
||||
prop_data = {'value': data[prop]}
|
||||
if metadata.raw:
|
||||
prop_data['raw'] = metadata.raw
|
||||
if metadata.confidence:
|
||||
prop_data['confidence'] = metadata.confidence
|
||||
data[prop] = prop_data
|
||||
data[prop] = {"value": data[prop], "raw": self.raw(prop), "confidence": self.confidence(prop)}
|
||||
|
||||
return data
|
||||
|
||||
def nice_string(self, advanced=False):
|
||||
"""Return a string with the property names and their values,
|
||||
that also displays the associated confidence to each property.
|
||||
|
||||
FIXME: doc with param"""
|
||||
if advanced:
|
||||
data = self.to_dict(advanced)
|
||||
return json.dumps(data, indent=4)
|
||||
else:
|
||||
else:
|
||||
data = self.to_dict()
|
||||
|
||||
|
||||
parts = json.dumps(data, indent=4).split('\n')
|
||||
for i, p in enumerate(parts):
|
||||
if p[:5] != ' "':
|
||||
continue
|
||||
|
||||
|
||||
prop = p.split('"')[1]
|
||||
parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:]
|
||||
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
||||
def __unicode__(self):
|
||||
return u(self.to_dict())
|
||||
|
||||
def metadata(self, prop=None):
|
||||
"""Return the metadata associated with the given property name
|
||||
|
||||
If no property name is given, get the global_metadata
|
||||
"""
|
||||
if prop is None:
|
||||
return self._global_metadata
|
||||
if not prop in self._metadata:
|
||||
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
|
||||
return self._metadata[prop]
|
||||
|
||||
def confidence(self, prop=None):
|
||||
return self.metadata(prop).confidence
|
||||
|
||||
def set_confidence(self, prop, confidence):
|
||||
self.metadata(prop).confidence = confidence
|
||||
|
||||
def confidence(self, prop):
|
||||
return self._confidence.get(prop, -1)
|
||||
|
||||
def raw(self, prop):
|
||||
return self.metadata(prop).raw
|
||||
return self._raw.get(prop, None)
|
||||
|
||||
def set(self, prop_name, value, *args, **kwargs):
|
||||
self[prop_name] = value
|
||||
self._metadata[prop_name] = GuessMetadata(parent=self._global_metadata, *args, **kwargs)
|
||||
def set(self, prop, value, confidence=None, raw=None):
|
||||
self[prop] = value
|
||||
if confidence is not None:
|
||||
self._confidence[prop] = confidence
|
||||
if raw is not None:
|
||||
self._raw[prop] = raw
|
||||
|
||||
def update(self, other, confidence=None):
|
||||
def set_confidence(self, prop, value):
|
||||
self._confidence[prop] = value
|
||||
|
||||
def set_raw(self, prop, value):
|
||||
self._raw[prop] = value
|
||||
|
||||
def update(self, other, confidence=None, raw=None):
|
||||
dict.update(self, other)
|
||||
if isinstance(other, Guess):
|
||||
for prop in other:
|
||||
try:
|
||||
self._metadata[prop] = other._metadata[prop]
|
||||
except KeyError:
|
||||
pass
|
||||
if not confidence is None:
|
||||
self._confidence[prop] = other.confidence(prop)
|
||||
self._raw[prop] = other.raw(prop)
|
||||
|
||||
if confidence is not None:
|
||||
for prop in other:
|
||||
self.set_confidence(prop, confidence)
|
||||
self._confidence[prop] = confidence
|
||||
|
||||
if raw is not None:
|
||||
for prop in other:
|
||||
self._raw[prop] = raw
|
||||
|
||||
def update_highest_confidence(self, other):
|
||||
"""Update this guess with the values from the given one. In case
|
||||
|
@ -234,16 +131,17 @@ class Guess(UnicodeMixin, dict):
|
|||
raise ValueError('Can only call this function on Guess instances')
|
||||
|
||||
for prop in other:
|
||||
if prop in self and self.metadata(prop).confidence >= other.metadata(prop).confidence:
|
||||
if prop in self and self.confidence(prop) >= other.confidence(prop):
|
||||
continue
|
||||
self[prop] = other[prop]
|
||||
self._metadata[prop] = other.metadata(prop)
|
||||
self._confidence[prop] = other.confidence(prop)
|
||||
self._raw[prop] = other.raw(prop)
|
||||
|
||||
|
||||
def choose_int(g1, g2):
|
||||
"""Function used by merge_similar_guesses to choose between 2 possible
|
||||
properties when they are integers."""
|
||||
v1, c1 = g1 # value, confidence
|
||||
v1, c1 = g1 # value, confidence
|
||||
v2, c2 = g2
|
||||
if (v1 == v2):
|
||||
return (v1, 1 - (1 - c1) * (1 - c2))
|
||||
|
@ -281,7 +179,7 @@ def choose_string(g1, g2):
|
|||
('The Simpsons', 0.75)
|
||||
|
||||
"""
|
||||
v1, c1 = g1 # value, confidence
|
||||
v1, c1 = g1 # value, confidence
|
||||
v2, c2 = g2
|
||||
|
||||
if not v1:
|
||||
|
@ -388,48 +286,43 @@ def merge_all(guesses, append=None):
|
|||
instead of being merged.
|
||||
|
||||
>>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
|
||||
... Guess({'episodeNumber': 13}, confidence=0.8) ])
|
||||
... ) == {'season': 2, 'episodeNumber': 13}
|
||||
True
|
||||
|
||||
... Guess({'episodeNumber': 13}, confidence=0.8) ]))
|
||||
{'season': 2, 'episodeNumber': 13}
|
||||
|
||||
>>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
|
||||
... Guess({'season': 1}, confidence=0.2) ])
|
||||
... ) == {'season': 1}
|
||||
True
|
||||
... Guess({'season': 1}, confidence=0.2) ]))
|
||||
{'season': 1}
|
||||
|
||||
>>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
|
||||
... Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
|
||||
... append=['other'])
|
||||
... ) == {'releaseGroup': '2HD', 'other': ['PROPER']}
|
||||
True
|
||||
... append=['other']))
|
||||
{'releaseGroup': '2HD', 'other': ['PROPER']}
|
||||
|
||||
|
||||
"""
|
||||
result = Guess()
|
||||
if not guesses:
|
||||
return result
|
||||
return Guess()
|
||||
|
||||
result = guesses[0]
|
||||
if append is None:
|
||||
append = []
|
||||
|
||||
for g in guesses:
|
||||
for g in guesses[1:]:
|
||||
# first append our appendable properties
|
||||
for prop in append:
|
||||
if prop in g:
|
||||
result.set(prop, result.get(prop, []) + [g[prop]],
|
||||
# TODO: what to do with confidence here? maybe an
|
||||
# arithmetic mean...
|
||||
confidence=g.metadata(prop).confidence,
|
||||
input=g.metadata(prop).input,
|
||||
span=g.metadata(prop).span,
|
||||
prop=g.metadata(prop).prop)
|
||||
confidence=g.confidence(prop),
|
||||
raw=g.raw(prop))
|
||||
|
||||
del g[prop]
|
||||
|
||||
# then merge the remaining ones
|
||||
dups = set(result) & set(g)
|
||||
if dups:
|
||||
log.warning('duplicate properties %s in merged result...' % [(result[p], g[p]) for p in dups])
|
||||
log.warning('duplicate properties %s in merged result...' % [ (result[p], g[p]) for p in dups] )
|
||||
|
||||
result.update_highest_confidence(g)
|
||||
|
||||
|
@ -445,7 +338,7 @@ def merge_all(guesses, append=None):
|
|||
if isinstance(value, list):
|
||||
result[prop] = list(set(value))
|
||||
else:
|
||||
result[prop] = [value]
|
||||
result[prop] = [ value ]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,8 +18,7 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import s, to_hex
|
||||
import hashlib
|
||||
import os.path
|
||||
|
@ -28,9 +27,8 @@ import os.path
|
|||
def hash_file(filename):
|
||||
"""Returns the ed2k hash of a given file.
|
||||
|
||||
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
|
||||
>>> s(hash_file(testfile))
|
||||
'ed2k://|file|dummy.srt|59|41F58B913AB3973F593BEBA8B8DF6510|/'
|
||||
>>> s(hash_file('tests/dummy.srt'))
|
||||
'ed2k://|file|dummy.srt|44|1CA0B9DED3473B926AA93A0A546138BB|/'
|
||||
"""
|
||||
return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
|
||||
os.path.getsize(filename),
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,8 +18,7 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import struct
|
||||
import os
|
||||
|
||||
|
@ -29,7 +28,7 @@ def hash_file(filename):
|
|||
http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
||||
and is licensed under the GPL."""
|
||||
|
||||
longlongformat = b'q' # long long
|
||||
longlongformat = 'q' # long long
|
||||
bytesize = struct.calcsize(longlongformat)
|
||||
|
||||
f = open(filename, "rb")
|
||||
|
@ -40,14 +39,14 @@ def hash_file(filename):
|
|||
if filesize < 65536 * 2:
|
||||
raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
|
||||
|
||||
for x in range(int(65536 / bytesize)):
|
||||
for x in range(65536 / bytesize):
|
||||
buf = f.read(bytesize)
|
||||
(l_value,) = struct.unpack(longlongformat, buf)
|
||||
hash_value += l_value
|
||||
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
|
||||
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
|
||||
|
||||
f.seek(max(0, filesize - 65536), 0)
|
||||
for x in range(int(65536 / bytesize)):
|
||||
for x in range(65536 / bytesize):
|
||||
buf = f.read(bytesize)
|
||||
(l_value,) = struct.unpack(longlongformat, buf)
|
||||
hash_value += l_value
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,143 +18,122 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit import UnicodeMixin, base_text_type, u
|
||||
from __future__ import unicode_literals
|
||||
from guessit import UnicodeMixin, base_text_type, u, s
|
||||
from guessit.fileutils import load_file_in_same_dir
|
||||
from guessit.textutils import find_words
|
||||
from babelfish import Language
|
||||
import babelfish
|
||||
from guessit.country import Country
|
||||
import re
|
||||
import logging
|
||||
from guessit.guess import Guess
|
||||
|
||||
__all__ = ['Language', 'UNDETERMINED',
|
||||
'search_language', 'guess_language']
|
||||
__all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language',
|
||||
'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'UNDETERMINED',
|
||||
'search_language', 'guess_language' ]
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
UNDETERMINED = babelfish.Language('und')
|
||||
|
||||
SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'],
|
||||
('ell', None): ['gr', 'greek'],
|
||||
('spa', None): ['esp', 'español'],
|
||||
('fra', None): ['français', 'vf', 'vff', 'vfi'],
|
||||
('swe', None): ['se'],
|
||||
('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'],
|
||||
('cat', None): ['català'],
|
||||
('ces', None): ['cz'],
|
||||
('ukr', None): ['ua'],
|
||||
('zho', None): ['cn'],
|
||||
('jpn', None): ['jp'],
|
||||
('hrv', None): ['scr'],
|
||||
('mul', None): ['multi', 'dl'], # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
|
||||
}
|
||||
# downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||
#
|
||||
# Description of the fields:
|
||||
# "An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given),
|
||||
# an alpha-2 code (when given), an English name, and a French name of a language
|
||||
# are all separated by pipe (|) characters."
|
||||
_iso639_contents = load_file_in_same_dir(__file__, 'ISO-639-2_utf-8.txt')
|
||||
|
||||
# drop the BOM from the beginning of the file
|
||||
_iso639_contents = _iso639_contents[1:]
|
||||
|
||||
language_matrix = [ l.strip().split('|')
|
||||
for l in _iso639_contents.strip().split('\n') ]
|
||||
|
||||
|
||||
class GuessitConverter(babelfish.LanguageReverseConverter):
|
||||
# update information in the language matrix
|
||||
language_matrix += [['mol', '', 'mo', 'Moldavian', 'moldave'],
|
||||
['ass', '', '', 'Assyrian', 'assyrien']]
|
||||
|
||||
_with_country_regexp = re.compile('(.*)\((.*)\)')
|
||||
_with_country_regexp2 = re.compile('(.*)-(.*)')
|
||||
|
||||
def __init__(self):
|
||||
self.guessit_exceptions = {}
|
||||
for (alpha3, country), synlist in SYN.items():
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
|
||||
|
||||
@property
|
||||
def codes(self):
|
||||
return (babelfish.language_converters['alpha3b'].codes |
|
||||
babelfish.language_converters['alpha2'].codes |
|
||||
babelfish.language_converters['name'].codes |
|
||||
babelfish.language_converters['opensubtitles'].codes |
|
||||
babelfish.country_converters['name'].codes |
|
||||
frozenset(self.guessit_exceptions.keys()))
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
return str(babelfish.Language(alpha3, country, script))
|
||||
|
||||
def reverse(self, name):
|
||||
with_country = (GuessitConverter._with_country_regexp.match(name) or
|
||||
GuessitConverter._with_country_regexp2.match(name))
|
||||
|
||||
if with_country:
|
||||
lang = babelfish.Language.fromguessit(with_country.group(1).strip())
|
||||
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
|
||||
return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None)
|
||||
|
||||
# exceptions come first, as they need to override a potential match
|
||||
# with any of the other guessers
|
||||
try:
|
||||
return self.guessit_exceptions[name.lower()]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for conv in [babelfish.Language,
|
||||
babelfish.Language.fromalpha3b,
|
||||
babelfish.Language.fromalpha2,
|
||||
babelfish.Language.fromname,
|
||||
babelfish.Language.fromopensubtitles]:
|
||||
try:
|
||||
c = conv(name)
|
||||
return c.alpha3, c.country, c.script
|
||||
except (ValueError, babelfish.LanguageReverseError):
|
||||
pass
|
||||
|
||||
raise babelfish.LanguageReverseError(name)
|
||||
for lang in language_matrix:
|
||||
# remove unused languages that shadow other common ones with a non-official form
|
||||
if (lang[2] == 'se' or # Northern Sami shadows Swedish
|
||||
lang[2] == 'br'): # Breton shadows Brazilian
|
||||
lang[2] = ''
|
||||
# add missing information
|
||||
if lang[0] == 'und':
|
||||
lang[2] = 'un'
|
||||
if lang[0] == 'srp':
|
||||
lang[1] = 'scc' # from OpenSubtitles
|
||||
|
||||
|
||||
babelfish.language_converters['guessit'] = GuessitConverter()
|
||||
lng3 = frozenset(l[0] for l in language_matrix if l[0])
|
||||
lng3term = frozenset(l[1] for l in language_matrix if l[1])
|
||||
lng2 = frozenset(l[2] for l in language_matrix if l[2])
|
||||
lng_en_name = frozenset(lng for l in language_matrix
|
||||
for lng in l[3].lower().split('; ') if lng)
|
||||
lng_fr_name = frozenset(lng for l in language_matrix
|
||||
for lng in l[4].lower().split('; ') if lng)
|
||||
lng_all_names = lng3 | lng3term | lng2 | lng_en_name | lng_fr_name
|
||||
|
||||
COUNTRIES_SYN = {'ES': ['españa'],
|
||||
'GB': ['UK'],
|
||||
'BR': ['brazilian', 'bra'],
|
||||
# FIXME: this one is a bit of a stretch, not sure how to do
|
||||
# it properly, though...
|
||||
'MX': ['Latinoamérica', 'latin america']
|
||||
}
|
||||
lng3_to_lng3term = dict((l[0], l[1]) for l in language_matrix if l[1])
|
||||
lng3term_to_lng3 = dict((l[1], l[0]) for l in language_matrix if l[1])
|
||||
|
||||
lng3_to_lng2 = dict((l[0], l[2]) for l in language_matrix if l[2])
|
||||
lng2_to_lng3 = dict((l[2], l[0]) for l in language_matrix if l[2])
|
||||
|
||||
# we only return the first given english name, hoping it is the most used one
|
||||
lng3_to_lng_en_name = dict((l[0], l[3].split('; ')[0])
|
||||
for l in language_matrix if l[3])
|
||||
lng_en_name_to_lng3 = dict((en_name.lower(), l[0])
|
||||
for l in language_matrix if l[3]
|
||||
for en_name in l[3].split('; '))
|
||||
|
||||
# we only return the first given french name, hoping it is the most used one
|
||||
lng3_to_lng_fr_name = dict((l[0], l[4].split('; ')[0])
|
||||
for l in language_matrix if l[4])
|
||||
lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0])
|
||||
for l in language_matrix if l[4]
|
||||
for fr_name in l[4].split('; '))
|
||||
|
||||
# contains a list of exceptions: strings that should be parsed as a language
|
||||
# but which are not in an ISO form
|
||||
lng_exceptions = { 'unknown': ('und', None),
|
||||
'inconnu': ('und', None),
|
||||
'unk': ('und', None),
|
||||
'un': ('und', None),
|
||||
'gr': ('gre', None),
|
||||
'greek': ('gre', None),
|
||||
'esp': ('spa', None),
|
||||
'español': ('spa', None),
|
||||
'se': ('swe', None),
|
||||
'po': ('pt', 'br'),
|
||||
'pb': ('pt', 'br'),
|
||||
'pob': ('pt', 'br'),
|
||||
'br': ('pt', 'br'),
|
||||
'brazilian': ('pt', 'br'),
|
||||
'català': ('cat', None),
|
||||
'cz': ('cze', None),
|
||||
'ua': ('ukr', None),
|
||||
'cn': ('chi', None),
|
||||
'chs': ('chi', None),
|
||||
'jp': ('jpn', None),
|
||||
'scr': ('hrv', None)
|
||||
}
|
||||
|
||||
|
||||
class GuessitCountryConverter(babelfish.CountryReverseConverter):
|
||||
def __init__(self):
|
||||
self.guessit_exceptions = {}
|
||||
def is_iso_language(language):
|
||||
return language.lower() in lng_all_names
|
||||
|
||||
for alpha2, synlist in COUNTRIES_SYN.items():
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = alpha2
|
||||
def is_language(language):
|
||||
return is_iso_language(language) or language in lng_exceptions
|
||||
|
||||
@property
|
||||
def codes(self):
|
||||
return (babelfish.country_converters['name'].codes |
|
||||
frozenset(babelfish.COUNTRIES.values()) |
|
||||
frozenset(self.guessit_exceptions.keys()))
|
||||
def lang_set(languages, strict=False):
|
||||
"""Return a set of guessit.Language created from their given string
|
||||
representation.
|
||||
|
||||
def convert(self, alpha2):
|
||||
return str(babelfish.Country(alpha2))
|
||||
|
||||
def reverse(self, name):
|
||||
# exceptions come first, as they need to override a potential match
|
||||
# with any of the other guessers
|
||||
try:
|
||||
return self.guessit_exceptions[name.lower()]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return babelfish.Country(name.upper()).alpha2
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
for conv in [babelfish.Country.fromname]:
|
||||
try:
|
||||
return conv(name).alpha2
|
||||
except babelfish.CountryReverseError:
|
||||
pass
|
||||
|
||||
raise babelfish.CountryReverseError(name)
|
||||
|
||||
|
||||
babelfish.country_converters['guessit'] = GuessitCountryConverter()
|
||||
if strict is True, then this will raise an exception if any language
|
||||
could not be identified.
|
||||
"""
|
||||
return set(Language(l, strict=strict) for l in languages)
|
||||
|
||||
|
||||
class Language(UnicodeMixin):
|
||||
|
@ -174,65 +153,109 @@ class Language(UnicodeMixin):
|
|||
>>> Language('fr')
|
||||
Language(French)
|
||||
|
||||
>>> (Language('eng').english_name) == 'English'
|
||||
>>> s(Language('eng').french_name)
|
||||
'anglais'
|
||||
|
||||
>>> s(Language('pt(br)').country.english_name)
|
||||
'Brazil'
|
||||
|
||||
>>> s(Language('Español (Latinoamérica)').country.english_name)
|
||||
'Latin America'
|
||||
|
||||
>>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)')
|
||||
True
|
||||
|
||||
>>> (Language('pt(br)').country.name) == 'BRAZIL'
|
||||
True
|
||||
>>> s(Language('zz', strict=False).english_name)
|
||||
'Undetermined'
|
||||
|
||||
>>> (Language('zz', strict=False).english_name) == 'Undetermined'
|
||||
True
|
||||
|
||||
>>> (Language('pt(br)').opensubtitles) == 'pob'
|
||||
True
|
||||
>>> s(Language('pt(br)').opensubtitles)
|
||||
'pob'
|
||||
"""
|
||||
|
||||
def __init__(self, language, country=None, strict=False):
|
||||
_with_country_regexp = re.compile('(.*)\((.*)\)')
|
||||
_with_country_regexp2 = re.compile('(.*)-(.*)')
|
||||
|
||||
def __init__(self, language, country=None, strict=False, scheme=None):
|
||||
language = u(language.strip().lower())
|
||||
country = babelfish.Country(country.upper()) if country else None
|
||||
with_country = (Language._with_country_regexp.match(language) or
|
||||
Language._with_country_regexp2.match(language))
|
||||
if with_country:
|
||||
self.lang = Language(with_country.group(1)).lang
|
||||
self.country = Country(with_country.group(2))
|
||||
return
|
||||
|
||||
try:
|
||||
self.lang = babelfish.Language.fromguessit(language)
|
||||
# user given country overrides guessed one
|
||||
if country:
|
||||
self.lang.country = country
|
||||
self.lang = None
|
||||
self.country = Country(country) if country else None
|
||||
|
||||
except babelfish.LanguageReverseError:
|
||||
msg = 'The given string "%s" could not be identified as a language' % language
|
||||
if strict:
|
||||
raise ValueError(msg)
|
||||
# first look for scheme specific languages
|
||||
if scheme == 'opensubtitles':
|
||||
if language == 'br':
|
||||
self.lang = 'bre'
|
||||
return
|
||||
elif language == 'se':
|
||||
self.lang = 'sme'
|
||||
return
|
||||
elif scheme is not None:
|
||||
log.warning('Unrecognized scheme: "%s" - Proceeding with standard one' % scheme)
|
||||
|
||||
# look for ISO language codes
|
||||
if len(language) == 2:
|
||||
self.lang = lng2_to_lng3.get(language)
|
||||
elif len(language) == 3:
|
||||
self.lang = (language
|
||||
if language in lng3
|
||||
else lng3term_to_lng3.get(language))
|
||||
else:
|
||||
self.lang = (lng_en_name_to_lng3.get(language) or
|
||||
lng_fr_name_to_lng3.get(language))
|
||||
|
||||
# general language exceptions
|
||||
if self.lang is None and language in lng_exceptions:
|
||||
lang, country = lng_exceptions[language]
|
||||
self.lang = Language(lang).alpha3
|
||||
self.country = Country(country) if country else None
|
||||
|
||||
msg = 'The given string "%s" could not be identified as a language' % language
|
||||
|
||||
if self.lang is None and strict:
|
||||
raise ValueError(msg)
|
||||
|
||||
if self.lang is None:
|
||||
log.debug(msg)
|
||||
self.lang = UNDETERMINED
|
||||
|
||||
@property
|
||||
def country(self):
|
||||
return self.lang.country
|
||||
self.lang = 'und'
|
||||
|
||||
@property
|
||||
def alpha2(self):
|
||||
return self.lang.alpha2
|
||||
return lng3_to_lng2[self.lang]
|
||||
|
||||
@property
|
||||
def alpha3(self):
|
||||
return self.lang.alpha3
|
||||
return self.lang
|
||||
|
||||
@property
|
||||
def alpha3term(self):
|
||||
return self.lang.alpha3b
|
||||
return lng3_to_lng3term[self.lang]
|
||||
|
||||
@property
|
||||
def english_name(self):
|
||||
return self.lang.name
|
||||
return lng3_to_lng_en_name[self.lang]
|
||||
|
||||
@property
|
||||
def french_name(self):
|
||||
return lng3_to_lng_fr_name[self.lang]
|
||||
|
||||
@property
|
||||
def opensubtitles(self):
|
||||
return self.lang.opensubtitles
|
||||
if self.lang == 'por' and self.country and self.country.alpha2 == 'br':
|
||||
return 'pob'
|
||||
elif self.lang in ['gre', 'srp']:
|
||||
return self.alpha3term
|
||||
return self.alpha3
|
||||
|
||||
@property
|
||||
def tmdb(self):
|
||||
if self.country:
|
||||
return '%s-%s' % (self.alpha2, self.country.alpha2)
|
||||
return '%s-%s' % (self.alpha2, self.country.alpha2.upper())
|
||||
return self.alpha2
|
||||
|
||||
def __hash__(self):
|
||||
|
@ -240,8 +263,7 @@ class Language(UnicodeMixin):
|
|||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Language):
|
||||
# in Guessit, languages are considered equal if their main languages are equal
|
||||
return self.alpha3 == other.alpha3
|
||||
return self.lang == other.lang
|
||||
|
||||
if isinstance(other, base_text_type):
|
||||
try:
|
||||
|
@ -254,138 +276,115 @@ class Language(UnicodeMixin):
|
|||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __bool__(self):
|
||||
return self.lang != UNDETERMINED
|
||||
__nonzero__ = __bool__
|
||||
def __nonzero__(self):
|
||||
return self.lang != 'und'
|
||||
|
||||
def __unicode__(self):
|
||||
if self.lang.country:
|
||||
if self.country:
|
||||
return '%s(%s)' % (self.english_name, self.country.alpha2)
|
||||
else:
|
||||
return self.english_name
|
||||
|
||||
def __repr__(self):
|
||||
if self.lang.country:
|
||||
return 'Language(%s, country=%s)' % (self.english_name, self.lang.country)
|
||||
if self.country:
|
||||
return 'Language(%s, country=%s)' % (self.english_name, self.country)
|
||||
else:
|
||||
return 'Language(%s)' % self.english_name
|
||||
|
||||
|
||||
# list of common words which could be interpreted as languages, but which
|
||||
# are far too common to be able to say they represent a language in the
|
||||
# middle of a string (where they most likely carry their commmon meaning)
|
||||
LNG_COMMON_WORDS = frozenset([
|
||||
# english words
|
||||
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
|
||||
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
|
||||
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
|
||||
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
|
||||
# french words
|
||||
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
|
||||
'mal', 'est', 'vol', 'or', 'mon', 'se',
|
||||
# spanish words
|
||||
'la', 'el', 'del', 'por', 'mar',
|
||||
# other
|
||||
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
|
||||
'vi', 'ben', 'da', 'lt', 'ch',
|
||||
# new from babelfish
|
||||
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
|
||||
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
|
||||
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
|
||||
'fer', 'fun', 'two', 'big', 'psy', 'air',
|
||||
# release groups
|
||||
'bs' # Bosnian
|
||||
])
|
||||
UNDETERMINED = Language('und')
|
||||
ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED])
|
||||
ALL_LANGUAGES_NAMES = lng_all_names
|
||||
|
||||
|
||||
subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
|
||||
subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
|
||||
lang_prefixes = ['true']
|
||||
|
||||
|
||||
def find_possible_languages(string):
|
||||
"""Find possible languages in the string
|
||||
|
||||
:return: list of tuple (property, Language, lang_word, word)
|
||||
"""
|
||||
words = find_words(string)
|
||||
|
||||
valid_words = []
|
||||
for word in words:
|
||||
lang_word = word.lower()
|
||||
key = 'language'
|
||||
for prefix in subtitle_prefixes:
|
||||
if lang_word.startswith(prefix):
|
||||
lang_word = lang_word[len(prefix):]
|
||||
key = 'subtitleLanguage'
|
||||
for suffix in subtitle_suffixes:
|
||||
if lang_word.endswith(suffix):
|
||||
lang_word = lang_word[:len(suffix)]
|
||||
key = 'subtitleLanguage'
|
||||
for prefix in lang_prefixes:
|
||||
if lang_word.startswith(prefix):
|
||||
lang_word = lang_word[len(prefix):]
|
||||
if not lang_word in LNG_COMMON_WORDS:
|
||||
try:
|
||||
lang = Language(lang_word)
|
||||
# Keep language with alpha2 equilavent. Others are probably an uncommon language.
|
||||
if lang == 'mul' or hasattr(lang, 'alpha2'):
|
||||
valid_words.append((key, lang, lang_word, word))
|
||||
except babelfish.Error:
|
||||
pass
|
||||
return valid_words
|
||||
|
||||
|
||||
def search_language(string, lang_filter=None):
|
||||
def search_language(string, lang_filter=None, skip=None):
|
||||
"""Looks for language patterns, and if found return the language object,
|
||||
its group span and an associated confidence.
|
||||
|
||||
you can specify a list of allowed languages using the lang_filter argument,
|
||||
as in lang_filter = [ 'fr', 'eng', 'spanish' ]
|
||||
|
||||
>>> search_language('movie [en].avi')['language']
|
||||
Language(English)
|
||||
>>> search_language('movie [en].avi')
|
||||
(Language(English), (7, 9), 0.8)
|
||||
|
||||
>>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es'])
|
||||
|
||||
(None, None, None)
|
||||
"""
|
||||
|
||||
# list of common words which could be interpreted as languages, but which
|
||||
# are far too common to be able to say they represent a language in the
|
||||
# middle of a string (where they most likely carry their commmon meaning)
|
||||
lng_common_words = frozenset([
|
||||
# english words
|
||||
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
|
||||
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
|
||||
'fry', 'cop', 'zen', 'gay', 'fat', 'cherokee', 'got', 'an', 'as',
|
||||
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
|
||||
# french words
|
||||
'bas', 'de', 'le', 'son', 'vo', 'vf', 'ne', 'ca', 'ce', 'et', 'que',
|
||||
'mal', 'est', 'vol', 'or', 'mon', 'se',
|
||||
# spanish words
|
||||
'la', 'el', 'del', 'por', 'mar',
|
||||
# other
|
||||
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
|
||||
'vi', 'ben', 'da', 'lt'
|
||||
])
|
||||
sep = r'[](){} \._-+'
|
||||
|
||||
if lang_filter:
|
||||
lang_filter = set(babelfish.Language.fromguessit(lang) for lang in lang_filter)
|
||||
lang_filter = lang_set(lang_filter)
|
||||
|
||||
confidence = 1.0 # for all of them
|
||||
slow = ' %s ' % string.lower()
|
||||
confidence = 1.0 # for all of them
|
||||
|
||||
for prop, language, lang, word in find_possible_languages(string):
|
||||
pos = string.find(word)
|
||||
end = pos + len(word)
|
||||
for lang in set(find_words(slow)) & lng_all_names:
|
||||
|
||||
if lang_filter and language not in lang_filter:
|
||||
if lang in lng_common_words:
|
||||
continue
|
||||
|
||||
# only allow those languages that have a 2-letter code, those that
|
||||
# don't are too esoteric and probably false matches
|
||||
#if language.lang not in lng3_to_lng2:
|
||||
# continue
|
||||
pos = slow.find(lang)
|
||||
|
||||
# confidence depends on alpha2, alpha3, english name, ...
|
||||
if len(lang) == 2:
|
||||
confidence = 0.8
|
||||
elif len(lang) == 3:
|
||||
confidence = 0.9
|
||||
elif prop == 'subtitleLanguage':
|
||||
confidence = 0.6 # Subtitle prefix found with language
|
||||
else:
|
||||
# Note: we could either be really confident that we found a
|
||||
# language or assume that full language names are too
|
||||
# common words and lower their confidence accordingly
|
||||
confidence = 0.3 # going with the low-confidence route here
|
||||
if pos != -1:
|
||||
end = pos + len(lang)
|
||||
|
||||
# skip if span in in skip list
|
||||
while skip and (pos - 1, end - 1) in skip:
|
||||
pos = slow.find(lang, end)
|
||||
if pos == -1:
|
||||
continue
|
||||
end = pos + len(lang)
|
||||
if pos == -1:
|
||||
continue
|
||||
|
||||
# make sure our word is always surrounded by separators
|
||||
if slow[pos - 1] not in sep or slow[end] not in sep:
|
||||
continue
|
||||
|
||||
return Guess({prop: language}, confidence=confidence, input=string, span=(pos, end))
|
||||
language = Language(slow[pos:end])
|
||||
if lang_filter and language not in lang_filter:
|
||||
continue
|
||||
|
||||
return None
|
||||
# only allow those languages that have a 2-letter code, those that
|
||||
# don't are too esoteric and probably false matches
|
||||
if language.lang not in lng3_to_lng2:
|
||||
continue
|
||||
|
||||
# confidence depends on lng2, lng3, english name, ...
|
||||
if len(lang) == 2:
|
||||
confidence = 0.8
|
||||
elif len(lang) == 3:
|
||||
confidence = 0.9
|
||||
else:
|
||||
# Note: we could either be really confident that we found a
|
||||
# language or assume that full language names are too
|
||||
# common words and lower their confidence accordingly
|
||||
confidence = 0.3 # going with the low-confidence route here
|
||||
|
||||
return language, (pos - 1, end - 1), confidence
|
||||
|
||||
return None, None, None
|
||||
|
||||
|
||||
def guess_language(text): # pragma: no cover
|
||||
def guess_language(text):
|
||||
"""Guess the language in which a body of text is written.
|
||||
|
||||
This uses the external guess-language python module, and will fail and return
|
||||
|
@ -393,7 +392,7 @@ def guess_language(text): # pragma: no cover
|
|||
"""
|
||||
try:
|
||||
from guess_language import guessLanguage
|
||||
return babelfish.Language.fromguessit(guessLanguage(text))
|
||||
return Language(guessLanguage(text))
|
||||
|
||||
except ImportError:
|
||||
log.error('Cannot detect the language of the given text body, missing dependency: guess-language')
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -19,229 +18,163 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, \
|
||||
unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
from guessit import PY3, u
|
||||
from guessit.transfo import TransformerException
|
||||
from __future__ import unicode_literals
|
||||
from guessit import PY3, u, base_text_type
|
||||
from guessit.matchtree import MatchTree
|
||||
from guessit.textutils import normalize_unicode, clean_string
|
||||
from guessit.guess import Guess
|
||||
import inspect
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IterativeMatcher(object):
|
||||
"""An iterative matcher tries to match different patterns that appear
|
||||
in the filename.
|
||||
def __init__(self, filename, filetype='autodetect', opts=None, transfo_opts=None):
|
||||
"""An iterative matcher tries to match different patterns that appear
|
||||
in the filename.
|
||||
|
||||
The ``filetype`` argument indicates which type of file you want to match.
|
||||
If it is undefined, the matcher will try to see whether it can guess
|
||||
that the file corresponds to an episode, or otherwise will assume it is
|
||||
a movie.
|
||||
The 'filetype' argument indicates which type of file you want to match.
|
||||
If it is 'autodetect', the matcher will try to see whether it can guess
|
||||
that the file corresponds to an episode, or otherwise will assume it is
|
||||
a movie.
|
||||
|
||||
The recognized ``filetype`` values are:
|
||||
``['subtitle', 'info', 'movie', 'moviesubtitle', 'movieinfo', 'episode',
|
||||
'episodesubtitle', 'episodeinfo']``
|
||||
The recognized 'filetype' values are:
|
||||
[ autodetect, subtitle, info, movie, moviesubtitle, movieinfo, episode,
|
||||
episodesubtitle, episodeinfo ]
|
||||
|
||||
``options`` is a dict of options values to be passed to the transformations used
|
||||
by the matcher.
|
||||
|
||||
The IterativeMatcher works mainly in 2 steps:
|
||||
The IterativeMatcher works mainly in 2 steps:
|
||||
|
||||
First, it splits the filename into a match_tree, which is a tree of groups
|
||||
which have a semantic meaning, such as episode number, movie title,
|
||||
etc...
|
||||
First, it splits the filename into a match_tree, which is a tree of groups
|
||||
which have a semantic meaning, such as episode number, movie title,
|
||||
etc...
|
||||
|
||||
The match_tree created looks like the following::
|
||||
The match_tree created looks like the following:
|
||||
|
||||
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
|
||||
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
|
||||
0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
|
||||
__________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
|
||||
xxxxxttttttttttttt ffffff vvvv xxxxxx ll lll xx xxx ccc
|
||||
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
|
||||
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
|
||||
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
|
||||
0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
|
||||
__________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
|
||||
xxxxxttttttttttttt ffffff vvvv xxxxxx ll lll xx xxx ccc
|
||||
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
|
||||
|
||||
The first 3 lines indicates the group index in which a char in the
|
||||
filename is located. So for instance, ``x264`` (in the middle) is the group (0, 4, 1), and
|
||||
it corresponds to a video codec, denoted by the letter ``v`` in the 4th line.
|
||||
(for more info, see guess.matchtree.to_string)
|
||||
The first 3 lines indicates the group index in which a char in the
|
||||
filename is located. So for instance, x264 is the group (0, 4, 1), and
|
||||
it corresponds to a video codec, denoted by the letter'v' in the 4th line.
|
||||
(for more info, see guess.matchtree.to_string)
|
||||
|
||||
Second, it tries to merge all this information into a single object
|
||||
containing all the found properties, and does some (basic) conflict
|
||||
resolution when they arise.
|
||||
"""
|
||||
def __init__(self, filename, options=None, **kwargs):
|
||||
options = dict(options or {})
|
||||
for k, v in kwargs.items():
|
||||
if k not in options or not options[k]:
|
||||
options[k] = v # options dict has priority over keyword arguments
|
||||
self._validate_options(options)
|
||||
Second, it tries to merge all this information into a single object
|
||||
containing all the found properties, and does some (basic) conflict
|
||||
resolution when they arise.
|
||||
|
||||
|
||||
When you create the Matcher, you can pass it:
|
||||
- a list 'opts' of option names, that act as global flags
|
||||
- a dict 'transfo_opts' of { transfo_name: (transfo_args, transfo_kwargs) }
|
||||
with which to call the transfo.process() function.
|
||||
"""
|
||||
|
||||
valid_filetypes = ('autodetect', 'subtitle', 'info', 'video',
|
||||
'movie', 'moviesubtitle', 'movieinfo',
|
||||
'episode', 'episodesubtitle', 'episodeinfo')
|
||||
if filetype not in valid_filetypes:
|
||||
raise ValueError("filetype needs to be one of %s" % valid_filetypes)
|
||||
if not PY3 and not isinstance(filename, unicode):
|
||||
log.warning('Given filename to matcher is not unicode...')
|
||||
filename = filename.decode('utf-8')
|
||||
|
||||
filename = normalize_unicode(filename)
|
||||
|
||||
if opts is None:
|
||||
opts = []
|
||||
if not isinstance(opts, list):
|
||||
raise ValueError('opts must be a list of option names! Received: type=%s val=%s',
|
||||
type(opts), opts)
|
||||
|
||||
if transfo_opts is None:
|
||||
transfo_opts = {}
|
||||
if not isinstance(transfo_opts, dict):
|
||||
raise ValueError('transfo_opts must be a dict of { transfo_name: (args, kwargs) }. '+
|
||||
'Received: type=%s val=%s', type(transfo_opts), transfo_opts)
|
||||
|
||||
self.match_tree = MatchTree(filename)
|
||||
self.options = options
|
||||
self._transfo_calls = []
|
||||
|
||||
# sanity check: make sure we don't process a (mostly) empty string
|
||||
if clean_string(filename) == '':
|
||||
return
|
||||
|
||||
from guessit.plugins import transformers
|
||||
mtree = self.match_tree
|
||||
mtree.guess.set('type', filetype, confidence=1.0)
|
||||
|
||||
try:
|
||||
mtree = self.match_tree
|
||||
if 'type' in self.options:
|
||||
mtree.guess.set('type', self.options['type'], confidence=0.0)
|
||||
def apply_transfo(transfo_name, *args, **kwargs):
|
||||
transfo = __import__('guessit.transfo.' + transfo_name,
|
||||
globals=globals(), locals=locals(),
|
||||
fromlist=['process'], level=0)
|
||||
default_args, default_kwargs = transfo_opts.get(transfo_name, ((), {}))
|
||||
all_args = args or default_args
|
||||
all_kwargs = dict(default_kwargs)
|
||||
all_kwargs.update(kwargs) # keep all kwargs merged together
|
||||
transfo.process(mtree, *all_args, **all_kwargs)
|
||||
|
||||
# Process
|
||||
for transformer in transformers.all_transformers():
|
||||
self._process(transformer, False)
|
||||
# 1- first split our path into dirs + basename + ext
|
||||
apply_transfo('split_path_components')
|
||||
|
||||
# Post-process
|
||||
for transformer in transformers.all_transformers():
|
||||
self._process(transformer, True)
|
||||
# 2- guess the file type now (will be useful later)
|
||||
apply_transfo('guess_filetype', filetype)
|
||||
if mtree.guess['type'] == 'unknown':
|
||||
return
|
||||
|
||||
log.debug('Found match tree:\n%s' % u(mtree))
|
||||
except TransformerException as e:
|
||||
log.debug('An error has occured in Transformer %s: %s' % (e.transformer, e))
|
||||
# 3- split each of those into explicit groups (separated by parentheses
|
||||
# or square brackets)
|
||||
apply_transfo('split_explicit_groups')
|
||||
|
||||
def _process(self, transformer, post=False):
|
||||
if not hasattr(transformer, 'should_process') or transformer.should_process(self.match_tree, self.options):
|
||||
if post:
|
||||
transformer.post_process(self.match_tree, self.options)
|
||||
else:
|
||||
transformer.process(self.match_tree, self.options)
|
||||
self._transfo_calls.append(transformer)
|
||||
# 4- try to match information for specific patterns
|
||||
# NOTE: order needs to comply to the following:
|
||||
# - website before language (eg: tvu.org.ru vs russian)
|
||||
# - language before episodes_rexps
|
||||
# - properties before language (eg: he-aac vs hebrew)
|
||||
# - release_group before properties (eg: XviD-?? vs xvid)
|
||||
if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
|
||||
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
|
||||
'guess_properties', 'guess_language',
|
||||
'guess_video_rexps',
|
||||
'guess_episodes_rexps', 'guess_weak_episodes_rexps' ]
|
||||
else:
|
||||
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
|
||||
'guess_properties', 'guess_language',
|
||||
'guess_video_rexps' ]
|
||||
|
||||
@property
|
||||
def second_pass_options(self):
|
||||
second_pass_options = {}
|
||||
for transformer in self._transfo_calls:
|
||||
if hasattr(transformer, 'second_pass_options'):
|
||||
transformer_second_pass_options = transformer.second_pass_options(self.match_tree, self.options)
|
||||
if transformer_second_pass_options:
|
||||
second_pass_options.update(transformer_second_pass_options)
|
||||
if 'nolanguage' in opts:
|
||||
strategy.remove('guess_language')
|
||||
|
||||
return second_pass_options
|
||||
|
||||
def _validate_options(self, options):
|
||||
valid_filetypes = ('subtitle', 'info', 'video',
|
||||
'movie', 'moviesubtitle', 'movieinfo',
|
||||
'episode', 'episodesubtitle', 'episodeinfo')
|
||||
for name in strategy:
|
||||
apply_transfo(name)
|
||||
|
||||
type = options.get('type')
|
||||
if type and type not in valid_filetypes:
|
||||
raise ValueError("filetype needs to be one of %s" % valid_filetypes)
|
||||
# more guessers for both movies and episodes
|
||||
apply_transfo('guess_bonus_features')
|
||||
apply_transfo('guess_year', skip_first_year=('skip_first_year' in opts))
|
||||
|
||||
if 'nocountry' not in opts:
|
||||
apply_transfo('guess_country')
|
||||
|
||||
apply_transfo('guess_idnumber')
|
||||
|
||||
|
||||
# split into '-' separated subgroups (with required separator chars
|
||||
# around the dash)
|
||||
apply_transfo('split_on_dash')
|
||||
|
||||
# 5- try to identify the remaining unknown groups by looking at their
|
||||
# position relative to other known elements
|
||||
if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
|
||||
apply_transfo('guess_episode_info_from_position')
|
||||
else:
|
||||
apply_transfo('guess_movie_title_from_position')
|
||||
|
||||
# 6- perform some post-processing steps
|
||||
apply_transfo('post_process')
|
||||
|
||||
log.debug('Found match tree:\n%s' % u(mtree))
|
||||
|
||||
def matched(self):
|
||||
return self.match_tree.matched()
|
||||
|
||||
|
||||
def found_property(node, name, value=None, confidence=1.0, update_guess=True, logger=None):
|
||||
# automatically retrieve the log object from the caller frame
|
||||
if not logger:
|
||||
caller_frame = inspect.stack()[1][0]
|
||||
logger = caller_frame.f_locals['self'].log
|
||||
guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence)
|
||||
return found_guess(node, guess, update_guess=update_guess, logger=logger)
|
||||
|
||||
|
||||
def found_guess(node, guess, update_guess=True, logger=None):
|
||||
if node.guess:
|
||||
if update_guess:
|
||||
node.guess.update_highest_confidence(guess)
|
||||
else:
|
||||
child = node.add_child(guess.metadata().span)
|
||||
child.guess = guess
|
||||
else:
|
||||
node.guess = guess
|
||||
log_found_guess(guess, logger)
|
||||
return node.guess
|
||||
|
||||
|
||||
def log_found_guess(guess, logger=None):
|
||||
for k, v in guess.items():
|
||||
(logger or log).debug('Property found: %s=%s (confidence=%.2f)' % (k, v, guess.confidence(k)))
|
||||
|
||||
|
||||
class GuessFinder(object):
|
||||
def __init__(self, guess_func, confidence=None, logger=None, options=None):
|
||||
self.guess_func = guess_func
|
||||
self.confidence = confidence
|
||||
self.logger = logger or log
|
||||
self.options = options
|
||||
|
||||
def process_nodes(self, nodes):
|
||||
for node in nodes:
|
||||
self.process_node(node)
|
||||
|
||||
def process_node(self, node, iterative=True, partial_span=None):
|
||||
value = None
|
||||
if partial_span:
|
||||
value = node.value[partial_span[0]:partial_span[1]]
|
||||
else:
|
||||
value = node.value
|
||||
string = ' %s ' % value # add sentinels
|
||||
|
||||
if not self.options:
|
||||
matcher_result = self.guess_func(string, node)
|
||||
else:
|
||||
matcher_result = self.guess_func(string, node, self.options)
|
||||
|
||||
if matcher_result:
|
||||
if not isinstance(matcher_result, Guess):
|
||||
result, span = matcher_result
|
||||
else:
|
||||
result, span = matcher_result, matcher_result.metadata().span
|
||||
|
||||
if result:
|
||||
# readjust span to compensate for sentinels
|
||||
span = (span[0] - 1, span[1] - 1)
|
||||
|
||||
# readjust span to compensate for partial_span
|
||||
if partial_span:
|
||||
span = (span[0] + partial_span[0], span[1] + partial_span[0])
|
||||
|
||||
partition_spans = None
|
||||
if self.options and 'skip_nodes' in self.options:
|
||||
skip_nodes = self.options.get('skip_nodes')
|
||||
for skip_node in skip_nodes:
|
||||
if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
|
||||
skip_node.span == span:
|
||||
partition_spans = node.get_partition_spans(skip_node.span)
|
||||
partition_spans.remove(skip_node.span)
|
||||
break
|
||||
|
||||
if not partition_spans:
|
||||
# restore sentinels compensation
|
||||
|
||||
guess = None
|
||||
if isinstance(result, Guess):
|
||||
guess = result
|
||||
else:
|
||||
guess = Guess(result, confidence=self.confidence, input=string, span=span)
|
||||
|
||||
if not iterative:
|
||||
node.guess.update(guess)
|
||||
else:
|
||||
absolute_span = (span[0] + node.offset, span[1] + node.offset)
|
||||
node.partition(span)
|
||||
found_child = None
|
||||
for child in node.children:
|
||||
if child.span == absolute_span:
|
||||
found_guess(child, guess, self.logger)
|
||||
found_child = child
|
||||
break
|
||||
for child in node.children:
|
||||
if not child is found_child:
|
||||
self.process_node(child)
|
||||
else:
|
||||
for partition_span in partition_spans:
|
||||
self.process_node(node, partial_span=partition_span)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,14 +18,12 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import guessit # @UnusedImport needed for doctests
|
||||
from guessit import UnicodeMixin, base_text_type
|
||||
from __future__ import unicode_literals
|
||||
from guessit import UnicodeMixin, base_text_type, Guess
|
||||
from guessit.textutils import clean_string, str_fill
|
||||
from guessit.patterns import group_delimiters
|
||||
from guessit.guess import (merge_similar_guesses, merge_all,
|
||||
choose_int, choose_string, Guess)
|
||||
choose_int, choose_string)
|
||||
import copy
|
||||
import logging
|
||||
|
||||
|
@ -33,45 +31,8 @@ log = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class BaseMatchTree(UnicodeMixin):
|
||||
"""A BaseMatchTree is a tree covering the filename, where each
|
||||
node represents a substring in the filename and can have a ``Guess``
|
||||
associated with it that contains the information that has been guessed
|
||||
in this node. Nodes can be further split into subnodes until a proper
|
||||
split has been found.
|
||||
|
||||
Each node has the following attributes:
|
||||
- string = the original string of which this node represents a region
|
||||
- span = a pair of (begin, end) indices delimiting the substring
|
||||
- parent = parent node
|
||||
- children = list of children nodes
|
||||
- guess = Guess()
|
||||
|
||||
BaseMatchTrees are displayed in the following way:
|
||||
|
||||
>>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
|
||||
>>> print(guessit.IterativeMatcher(path).match_tree)
|
||||
000000 1111111111111111 2222222222222222222222222222222222222222222 333
|
||||
000000 0000000000111111 0000000000111111222222222222222222222222222 000
|
||||
011112 011112000011111222222222222222222 000
|
||||
011112222222222222
|
||||
0000011112222
|
||||
01112 0111
|
||||
Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
|
||||
tttttttttt yyyy yyyy fffff ssss aaa vvvv rrr ccc
|
||||
Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
|
||||
|
||||
The last line contains the filename, which you can use a reference.
|
||||
The previous line contains the type of property that has been found.
|
||||
The line before that contains the filename, where all the found groups
|
||||
have been blanked. Basically, what is left on this line are the leftover
|
||||
groups which could not be identified.
|
||||
|
||||
The lines before that indicate the indices of the groups in the tree.
|
||||
|
||||
For instance, the part of the filename 'BDRip' is the leaf with index
|
||||
``(2, 2, 1)`` (read from top to bottom), and its meaning is 'format'
|
||||
(as shown by the ``f``'s on the last-but-one line).
|
||||
"""
|
||||
"""A MatchTree represents the hierarchical split of a string into its
|
||||
constituent semantic groups."""
|
||||
|
||||
def __init__(self, string='', span=None, parent=None):
|
||||
self.string = string
|
||||
|
@ -82,14 +43,10 @@ class BaseMatchTree(UnicodeMixin):
|
|||
|
||||
@property
|
||||
def value(self):
|
||||
"""Return the substring that this node matches."""
|
||||
return self.string[self.span[0]:self.span[1]]
|
||||
|
||||
@property
|
||||
def clean_value(self):
|
||||
"""Return a cleaned value of the matched substring, with better
|
||||
presentation formatting (punctuation marks removed, duplicate
|
||||
spaces, ...)"""
|
||||
return clean_string(self.value)
|
||||
|
||||
@property
|
||||
|
@ -98,8 +55,6 @@ class BaseMatchTree(UnicodeMixin):
|
|||
|
||||
@property
|
||||
def info(self):
|
||||
"""Return a dict containing all the info guessed by this node,
|
||||
subnodes included."""
|
||||
result = dict(self.guess)
|
||||
|
||||
for c in self.children:
|
||||
|
@ -109,7 +64,6 @@ class BaseMatchTree(UnicodeMixin):
|
|||
|
||||
@property
|
||||
def root(self):
|
||||
"""Return the root node of the tree."""
|
||||
if not self.parent:
|
||||
return self
|
||||
|
||||
|
@ -117,43 +71,28 @@ class BaseMatchTree(UnicodeMixin):
|
|||
|
||||
@property
|
||||
def depth(self):
|
||||
"""Return the depth of this node."""
|
||||
if self.is_leaf():
|
||||
return 0
|
||||
|
||||
return 1 + max(c.depth for c in self.children)
|
||||
|
||||
def is_leaf(self):
|
||||
"""Return whether this node is a leaf or not."""
|
||||
return self.children == []
|
||||
|
||||
def add_child(self, span):
|
||||
"""Add a new child node to this node with the given span."""
|
||||
child = MatchTree(self.string, span=span, parent=self)
|
||||
self.children.append(child)
|
||||
return child
|
||||
|
||||
def get_partition_spans(self, indices):
|
||||
"""Return the list of absolute spans for the regions of the original
|
||||
string defined by splitting this node at the given indices (relative
|
||||
to this node)"""
|
||||
def partition(self, indices):
|
||||
indices = sorted(indices)
|
||||
if indices[0] != 0:
|
||||
indices.insert(0, 0)
|
||||
if indices[-1] != len(self.value):
|
||||
indices.append(len(self.value))
|
||||
|
||||
spans = []
|
||||
for start, end in zip(indices[:-1], indices[1:]):
|
||||
spans.append((self.offset + start,
|
||||
self.offset + end))
|
||||
return spans
|
||||
|
||||
def partition(self, indices):
|
||||
"""Partition this node by splitting it at the given indices,
|
||||
relative to this node."""
|
||||
for partition_span in self.get_partition_spans(indices):
|
||||
self.add_child(span=partition_span)
|
||||
self.add_child(span=(self.offset + start,
|
||||
self.offset + end))
|
||||
|
||||
def split_on_components(self, components):
|
||||
offset = 0
|
||||
|
@ -165,7 +104,6 @@ class BaseMatchTree(UnicodeMixin):
|
|||
offset = end
|
||||
|
||||
def nodes_at_depth(self, depth):
|
||||
"""Return all the nodes at a given depth in the tree"""
|
||||
if depth == 0:
|
||||
yield self
|
||||
|
||||
|
@ -175,32 +113,26 @@ class BaseMatchTree(UnicodeMixin):
|
|||
|
||||
@property
|
||||
def node_idx(self):
|
||||
"""Return this node's index in the tree, as a tuple.
|
||||
If this node is the root of the tree, then return ()."""
|
||||
if self.parent is None:
|
||||
return ()
|
||||
return self.parent.node_idx + (self.parent.children.index(self),)
|
||||
|
||||
def node_at(self, idx):
|
||||
"""Return the node at the given index in the subtree rooted at
|
||||
this node."""
|
||||
if not idx:
|
||||
return self
|
||||
|
||||
try:
|
||||
return self.children[idx[0]].node_at(idx[1:])
|
||||
except IndexError:
|
||||
except:
|
||||
raise ValueError('Non-existent node index: %s' % (idx,))
|
||||
|
||||
def nodes(self):
|
||||
"""Return all the nodes and subnodes in this tree."""
|
||||
yield self
|
||||
for child in self.children:
|
||||
for node in child.nodes():
|
||||
yield node
|
||||
|
||||
def _leaves(self):
|
||||
"""Return a generator over all the nodes that are leaves."""
|
||||
if self.is_leaf():
|
||||
yield self
|
||||
else:
|
||||
|
@ -209,73 +141,10 @@ class BaseMatchTree(UnicodeMixin):
|
|||
for leaf in child._leaves():
|
||||
yield leaf
|
||||
|
||||
def group_node(self):
|
||||
return self._other_group_node(0)
|
||||
|
||||
def previous_group_node(self):
|
||||
return self._other_group_node(-1)
|
||||
|
||||
def next_group_node(self):
|
||||
return self._other_group_node(+1)
|
||||
|
||||
def _other_group_node(self, offset):
|
||||
if len(self.node_idx) > 1:
|
||||
group_idx = self.node_idx[:2]
|
||||
if group_idx[1] + offset >= 0:
|
||||
other_group_idx = (group_idx[0], group_idx[1] + offset)
|
||||
try:
|
||||
other_group_node = self.root.node_at(other_group_idx)
|
||||
return other_group_node
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def leaves(self):
|
||||
"""Return a list of all the nodes that are leaves."""
|
||||
return list(self._leaves())
|
||||
|
||||
def previous_leaf(self, leaf):
|
||||
"""Return previous leaf for this node"""
|
||||
return self._other_leaf(leaf, -1)
|
||||
|
||||
def next_leaf(self, leaf):
|
||||
"""Return next leaf for this node"""
|
||||
return self._other_leaf(leaf, +1)
|
||||
|
||||
def _other_leaf(self, leaf, offset):
|
||||
leaves = self.leaves()
|
||||
index = leaves.index(leaf) + offset
|
||||
if index > 0 and index < len(leaves):
|
||||
return leaves[index]
|
||||
return None
|
||||
|
||||
def previous_leaves(self, leaf):
|
||||
"""Return previous leaves for this node"""
|
||||
leaves = self.leaves()
|
||||
index = leaves.index(leaf)
|
||||
if index > 0 and index < len(leaves):
|
||||
previous_leaves = leaves[:index]
|
||||
previous_leaves.reverse()
|
||||
return previous_leaves
|
||||
return []
|
||||
|
||||
def next_leaves(self, leaf):
|
||||
"""Return next leaves for this node"""
|
||||
leaves = self.leaves()
|
||||
index = leaves.index(leaf)
|
||||
if index > 0 and index < len(leaves):
|
||||
return leaves[index + 1:len(leaves)]
|
||||
return []
|
||||
|
||||
def to_string(self):
|
||||
"""Return a readable string representation of this tree.
|
||||
|
||||
The result is a multi-line string, where the lines are:
|
||||
- line 1 -> N-2: each line contains the nodes at the given depth in the tree
|
||||
- line N-2: original string where all the found groups have been blanked
|
||||
- line N-1: type of property that has been found
|
||||
- line N: the original string, which you can use a reference.
|
||||
"""
|
||||
empty_line = ' ' * len(self.string)
|
||||
|
||||
def to_hex(x):
|
||||
|
@ -284,27 +153,23 @@ class BaseMatchTree(UnicodeMixin):
|
|||
return x
|
||||
|
||||
def meaning(result):
|
||||
mmap = {'episodeNumber': 'E',
|
||||
'season': 'S',
|
||||
'extension': 'e',
|
||||
'format': 'f',
|
||||
'language': 'l',
|
||||
'country': 'C',
|
||||
'videoCodec': 'v',
|
||||
'videoProfile': 'v',
|
||||
'audioCodec': 'a',
|
||||
'audioProfile': 'a',
|
||||
'audioChannels': 'a',
|
||||
'website': 'w',
|
||||
'container': 'c',
|
||||
'series': 'T',
|
||||
'title': 't',
|
||||
'date': 'd',
|
||||
'year': 'y',
|
||||
'releaseGroup': 'r',
|
||||
'screenSize': 's',
|
||||
'other': 'o'
|
||||
}
|
||||
mmap = { 'episodeNumber': 'E',
|
||||
'season': 'S',
|
||||
'extension': 'e',
|
||||
'format': 'f',
|
||||
'language': 'l',
|
||||
'country': 'C',
|
||||
'videoCodec': 'v',
|
||||
'audioCodec': 'a',
|
||||
'website': 'w',
|
||||
'container': 'c',
|
||||
'series': 'T',
|
||||
'title': 't',
|
||||
'date': 'd',
|
||||
'year': 'y',
|
||||
'releaseGroup': 'r',
|
||||
'screenSize': 's'
|
||||
}
|
||||
|
||||
if result is None:
|
||||
return ' '
|
||||
|
@ -315,7 +180,7 @@ class BaseMatchTree(UnicodeMixin):
|
|||
|
||||
return 'x'
|
||||
|
||||
lines = [empty_line] * (self.depth + 2) # +2: remaining, meaning
|
||||
lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
|
||||
lines[-2] = self.string
|
||||
|
||||
for node in self.nodes():
|
||||
|
@ -333,22 +198,16 @@ class BaseMatchTree(UnicodeMixin):
|
|||
|
||||
lines.append(self.string)
|
||||
|
||||
return '\n'.join(l.rstrip() for l in lines)
|
||||
return '\n'.join(lines)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.to_string()
|
||||
|
||||
def __repr__(self):
|
||||
return '<MatchTree: root=%s>' % self.value
|
||||
|
||||
|
||||
class MatchTree(BaseMatchTree):
|
||||
"""The MatchTree contains a few "utility" methods which are not necessary
|
||||
for the BaseMatchTree, but add a lot of convenience for writing
|
||||
higher-level rules.
|
||||
"""
|
||||
|
||||
_matched_result = None
|
||||
higher-level rules."""
|
||||
|
||||
def _unidentified_leaves(self,
|
||||
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
||||
|
@ -358,12 +217,11 @@ class MatchTree(BaseMatchTree):
|
|||
|
||||
def unidentified_leaves(self,
|
||||
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
||||
"""Return a list of leaves that are not empty."""
|
||||
return list(self._unidentified_leaves(valid))
|
||||
|
||||
def _leaves_containing(self, property_name):
|
||||
if isinstance(property_name, base_text_type):
|
||||
property_name = [property_name]
|
||||
property_name = [ property_name ]
|
||||
|
||||
for leaf in self._leaves():
|
||||
for prop in property_name:
|
||||
|
@ -372,11 +230,9 @@ class MatchTree(BaseMatchTree):
|
|||
break
|
||||
|
||||
def leaves_containing(self, property_name):
|
||||
"""Return a list of leaves that guessed the given property."""
|
||||
return list(self._leaves_containing(property_name))
|
||||
|
||||
def first_leaf_containing(self, property_name):
|
||||
"""Return the first leaf containing the given property."""
|
||||
try:
|
||||
return next(self._leaves_containing(property_name))
|
||||
except StopIteration:
|
||||
|
@ -389,8 +245,6 @@ class MatchTree(BaseMatchTree):
|
|||
yield leaf
|
||||
|
||||
def previous_unidentified_leaves(self, node):
|
||||
"""Return a list of non-empty leaves that are before the given
|
||||
node (in the string)."""
|
||||
return list(self._previous_unidentified_leaves(node))
|
||||
|
||||
def _previous_leaves_containing(self, node, property_name):
|
||||
|
@ -400,8 +254,6 @@ class MatchTree(BaseMatchTree):
|
|||
yield leaf
|
||||
|
||||
def previous_leaves_containing(self, node, property_name):
|
||||
"""Return a list of leaves containing the given property that are
|
||||
before the given node (in the string)."""
|
||||
return list(self._previous_leaves_containing(node, property_name))
|
||||
|
||||
def is_explicit(self):
|
||||
|
@ -410,30 +262,26 @@ class MatchTree(BaseMatchTree):
|
|||
return (self.value[0] + self.value[-1]) in group_delimiters
|
||||
|
||||
def matched(self):
|
||||
"""Return a single guess that contains all the info found in the
|
||||
nodes of this tree, trying to merge properties as good as possible.
|
||||
"""
|
||||
if not self._matched_result:
|
||||
# we need to make a copy here, as the merge functions work in place and
|
||||
# calling them on the match tree would modify it
|
||||
parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]
|
||||
# we need to make a copy here, as the merge functions work in place and
|
||||
# calling them on the match tree would modify it
|
||||
parts = [node.guess for node in self.nodes() if node.guess]
|
||||
parts = copy.deepcopy(parts)
|
||||
|
||||
# 1- try to merge similar information together and give it a higher
|
||||
# confidence
|
||||
for int_part in ('year', 'season', 'episodeNumber'):
|
||||
merge_similar_guesses(parts, int_part, choose_int)
|
||||
# 1- try to merge similar information together and give it a higher
|
||||
# confidence
|
||||
for int_part in ('year', 'season', 'episodeNumber'):
|
||||
merge_similar_guesses(parts, int_part, choose_int)
|
||||
|
||||
for string_part in ('title', 'series', 'container', 'format',
|
||||
'releaseGroup', 'website', 'audioCodec',
|
||||
'videoCodec', 'screenSize', 'episodeFormat',
|
||||
'audioChannels', 'idNumber'):
|
||||
merge_similar_guesses(parts, string_part, choose_string)
|
||||
for string_part in ('title', 'series', 'container', 'format',
|
||||
'releaseGroup', 'website', 'audioCodec',
|
||||
'videoCodec', 'screenSize', 'episodeFormat',
|
||||
'audioChannels', 'idNumber'):
|
||||
merge_similar_guesses(parts, string_part, choose_string)
|
||||
|
||||
# 2- merge the rest, potentially discarding information not properly
|
||||
# merged before
|
||||
result = merge_all(parts,
|
||||
append=['language', 'subtitleLanguage', 'other', 'special'])
|
||||
# 2- merge the rest, potentially discarding information not properly
|
||||
# merged before
|
||||
result = merge_all(parts,
|
||||
append=['language', 'subtitleLanguage', 'other'])
|
||||
|
||||
log.debug('Final result: ' + result.nice_string())
|
||||
self._matched_result = result
|
||||
return self._matched_result
|
||||
log.debug('Final result: ' + result.nice_string())
|
||||
return result
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
from optparse import OptionParser
|
||||
|
||||
option_parser = OptionParser(usage='usage: %prog [options] file1 [file2...]')
|
||||
option_parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
|
||||
help='display debug output')
|
||||
option_parser.add_option('-p', '--properties', dest='properties', action='store_true', default=False,
|
||||
help='Display properties that can be guessed.')
|
||||
option_parser.add_option('-l', '--values', dest='values', action='store_true', default=False,
|
||||
help='Display property values that can be guessed.')
|
||||
option_parser.add_option('-s', '--transformers', dest='transformers', action='store_true', default=False,
|
||||
help='Display transformers that can be used.')
|
||||
option_parser.add_option('-i', '--info', dest='info', default='filename',
|
||||
help='the desired information type: filename, hash_mpc or a hash from python\'s '
|
||||
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
|
||||
'them, comma-separated')
|
||||
option_parser.add_option('-n', '--name-only', dest='name_only', action='store_true', default=False,
|
||||
help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.')
|
||||
option_parser.add_option('-t', '--type', dest='type', default=None,
|
||||
help='the suggested file type: movie, episode. If undefined, type will be guessed.')
|
||||
option_parser.add_option('-a', '--advanced', dest='advanced', action='store_true', default=False,
|
||||
help='display advanced information for filename guesses, as json output')
|
||||
option_parser.add_option('-y', '--yaml', dest='yaml', action='store_true', default=False,
|
||||
help='display information for filename guesses as yaml output (like unit-test)')
|
||||
option_parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
|
||||
help='run a few builtin tests instead of analyzing a file')
|
250
lib/guessit/patterns.py
Normal file
250
lib/guessit/patterns.py
Normal file
|
@ -0,0 +1,250 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
|
||||
subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ]
|
||||
|
||||
info_exts = [ 'nfo' ]
|
||||
|
||||
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
|
||||
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
|
||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv']
|
||||
|
||||
group_delimiters = [ '()', '[]', '{}' ]
|
||||
|
||||
# separator character regexp
|
||||
sep = r'[][,)(}{+ /\._-]' # regexp art, hehe :D
|
||||
|
||||
# character used to represent a deleted char (when matching groups)
|
||||
deleted = '_'
|
||||
|
||||
# format: [ (regexp, confidence, span_adjust) ]
|
||||
episode_rexps = [ # ... Season 2 ...
|
||||
(r'season (?P<season>[0-9]+)', 1.0, (0, 0)),
|
||||
(r'saison (?P<season>[0-9]+)', 1.0, (0, 0)),
|
||||
|
||||
# ... s02e13 ...
|
||||
(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<episodeNumber>(?:-?[eE-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
|
||||
|
||||
# ... s03-x02 ... # FIXME: redundant? remove it?
|
||||
#(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
|
||||
|
||||
# ... 2x13 ...
|
||||
(r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
|
||||
|
||||
# ... s02 ...
|
||||
#(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)),
|
||||
(r's(?P<season>[0-9]{1,2})[^0-9]', 0.6, (0, -1)),
|
||||
|
||||
# v2 or v3 for some mangas which have multiples rips
|
||||
(r'(?P<episodeNumber>[0-9]{1,3})v[23]' + sep, 0.6, (0, 0)),
|
||||
|
||||
# ... ep 23 ...
|
||||
('ep' + sep + r'(?P<episodeNumber>[0-9]{1,2})[^0-9]', 0.7, (0, -1)),
|
||||
|
||||
# ... e13 ... for a mini-series without a season number
|
||||
(sep + r'e(?P<episodeNumber>[0-9]{1,2})' + sep, 0.6, (1, -1))
|
||||
|
||||
]
|
||||
|
||||
|
||||
weak_episode_rexps = [ # ... 213 or 0106 ...
|
||||
(sep + r'(?P<episodeNumber>[0-9]{2,4})' + sep, (1, -1))
|
||||
]
|
||||
|
||||
non_episode_title = [ 'extras', 'rip' ]
|
||||
|
||||
|
||||
video_rexps = [ # cd number
|
||||
(r'cd ?(?P<cdNumber>[0-9])( ?of ?(?P<cdNumberTotal>[0-9]))?', 1.0, (0, 0)),
|
||||
(r'(?P<cdNumberTotal>[1-9]) cds?', 0.9, (0, 0)),
|
||||
|
||||
# special editions
|
||||
(r'edition' + sep + r'(?P<edition>collector)', 1.0, (0, 0)),
|
||||
(r'(?P<edition>collector)' + sep + 'edition', 1.0, (0, 0)),
|
||||
(r'(?P<edition>special)' + sep + 'edition', 1.0, (0, 0)),
|
||||
(r'(?P<edition>criterion)' + sep + 'edition', 1.0, (0, 0)),
|
||||
|
||||
# director's cut
|
||||
(r"(?P<edition>director'?s?" + sep + "cut)", 1.0, (0, 0)),
|
||||
|
||||
# video size
|
||||
(r'(?P<width>[0-9]{3,4})x(?P<height>[0-9]{3,4})', 0.9, (0, 0)),
|
||||
|
||||
# website
|
||||
(r'(?P<website>www(\.[a-zA-Z0-9]+){2,3})', 0.8, (0, 0)),
|
||||
|
||||
# bonusNumber: ... x01 ...
|
||||
(r'x(?P<bonusNumber>[0-9]{1,2})', 1.0, (0, 0)),
|
||||
|
||||
# filmNumber: ... f01 ...
|
||||
(r'f(?P<filmNumber>[0-9]{1,2})', 1.0, (0, 0))
|
||||
]
|
||||
|
||||
websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com',
|
||||
'sharethefiles.com' ]
|
||||
|
||||
unlikely_series = [ 'series' ]
|
||||
|
||||
|
||||
# prop_multi is a dict of { property_name: { canonical_form: [ pattern ] } }
|
||||
# pattern is a string considered as a regexp, with the addition that dashes are
|
||||
# replaced with '([ \.-_])?' which matches more types of separators (or none)
|
||||
# note: simpler patterns need to be at the end of the list to not shadow more
|
||||
# complete ones, eg: 'AAC' needs to come after 'He-AAC'
|
||||
# ie: from most specific to less specific
|
||||
prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ],
|
||||
'HD-DVD': [ 'HD-(?:DVD)?-Rip', 'HD-DVD' ],
|
||||
'BluRay': [ 'Blu-ray', 'B[DR]Rip' ],
|
||||
'HDTV': [ 'HD-TV' ],
|
||||
'DVB': [ 'DVB-Rip', 'DVB', 'PD-TV' ],
|
||||
'WEBRip': [ 'WEB-Rip' ],
|
||||
'Screener': [ 'DVD-SCR', 'Screener' ],
|
||||
'VHS': [ 'VHS' ],
|
||||
'WEB-DL': [ 'WEB-DL' ] },
|
||||
|
||||
'is3D': { True: [ '3D' ] },
|
||||
|
||||
'screenSize': { '480p': [ '480[pi]?' ],
|
||||
'720p': [ '720[pi]?' ],
|
||||
'1080i': [ '1080i' ],
|
||||
'1080p': [ '1080p', '1080[^i]' ] },
|
||||
|
||||
'videoCodec': { 'XviD': [ 'Xvid' ],
|
||||
'DivX': [ 'DVDivX', 'DivX' ],
|
||||
'h264': [ '[hx]-264' ],
|
||||
'Rv10': [ 'Rv10' ],
|
||||
'Mpeg2': [ 'Mpeg2' ] },
|
||||
|
||||
# has nothing to do here (or on filenames for that matter), but some
|
||||
# releases use it and it helps to identify release groups, so we adapt
|
||||
'videoApi': { 'DXVA': [ 'DXVA' ] },
|
||||
|
||||
'audioCodec': { 'AC3': [ 'AC3' ],
|
||||
'DTS': [ 'DTS' ],
|
||||
'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] },
|
||||
|
||||
'audioChannels': { '5.1': [ r'5\.1', 'DD5[._ ]1', '5ch' ] },
|
||||
|
||||
'episodeFormat': { 'Minisode': [ 'Minisodes?' ] }
|
||||
|
||||
}
|
||||
|
||||
# prop_single dict of { property_name: [ canonical_form ] }
|
||||
prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA',
|
||||
'CHD', 'ViTE', 'TLF', 'FLAiTE',
|
||||
'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS',
|
||||
'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL',
|
||||
'CtrlHD', 'POD', 'WiKi','IMMERSE', 'FQM',
|
||||
'2HD', 'CTU', 'HALCYON', 'EbP', 'SiTV',
|
||||
'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV',
|
||||
'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3',
|
||||
'TrollHD', 'ECI'
|
||||
],
|
||||
|
||||
# potentially confusing release group names (they are words)
|
||||
'weakReleaseGroup': [ 'DEiTY', 'FiNaLe', 'UnSeeN', 'KiNGS', 'CLUE', 'DIMENSION',
|
||||
'SAiNTS', 'ARROW', 'EuReKA', 'SiNNERS', 'DiRTY', 'REWARD',
|
||||
'REPTiLE',
|
||||
],
|
||||
|
||||
'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5',
|
||||
'complete', 'classic', # not so sure about these ones, could appear in a title
|
||||
'ws' ] # widescreen
|
||||
}
|
||||
|
||||
_dash = '-'
|
||||
_psep = '[-. _]?'
|
||||
|
||||
def _to_rexp(prop):
|
||||
return re.compile(prop.replace(_dash, _psep), re.IGNORECASE)
|
||||
|
||||
# properties_rexps dict of { property_name: { canonical_form: [ rexp ] } }
|
||||
# containing the rexps compiled from both prop_multi and prop_single
|
||||
properties_rexps = dict((type, dict((canonical_form,
|
||||
[ _to_rexp(pattern) for pattern in patterns ])
|
||||
for canonical_form, patterns in props.items()))
|
||||
for type, props in prop_multi.items())
|
||||
|
||||
properties_rexps.update(dict((type, dict((canonical_form, [ _to_rexp(canonical_form) ])
|
||||
for canonical_form in props))
|
||||
for type, props in prop_single.items()))
|
||||
|
||||
|
||||
|
||||
def find_properties(string):
|
||||
result = []
|
||||
for property_name, props in properties_rexps.items():
|
||||
# FIXME: this should be done in a more flexible way...
|
||||
if property_name in ['weakReleaseGroup']:
|
||||
continue
|
||||
|
||||
for canonical_form, rexps in props.items():
|
||||
for value_rexp in rexps:
|
||||
match = value_rexp.search(string)
|
||||
if match:
|
||||
start, end = match.span()
|
||||
# make sure our word is always surrounded by separators
|
||||
# note: sep is a regexp, but in this case using it as
|
||||
# a char sequence achieves the same goal
|
||||
if ((start > 0 and string[start-1] not in sep) or
|
||||
(end < len(string) and string[end] not in sep)):
|
||||
continue
|
||||
|
||||
result.append((property_name, canonical_form, start, end))
|
||||
return result
|
||||
|
||||
|
||||
property_synonyms = { 'Special Edition': [ 'Special' ],
|
||||
'Collector Edition': [ 'Collector' ],
|
||||
'Criterion Edition': [ 'Criterion' ]
|
||||
}
|
||||
|
||||
|
||||
def revert_synonyms():
|
||||
reverse = {}
|
||||
|
||||
for canonical, synonyms in property_synonyms.items():
|
||||
for synonym in synonyms:
|
||||
reverse[synonym.lower()] = canonical
|
||||
|
||||
return reverse
|
||||
|
||||
|
||||
reverse_synonyms = revert_synonyms()
|
||||
|
||||
|
||||
def canonical_form(string):
|
||||
return reverse_synonyms.get(string.lower(), string)
|
||||
|
||||
|
||||
def compute_canonical_form(property_name, value):
|
||||
"""Return the canonical form of a property given its type if it is a valid
|
||||
one, None otherwise."""
|
||||
if isinstance(value, basestring):
|
||||
for canonical_form, rexps in properties_rexps[property_name].items():
|
||||
for rexp in rexps:
|
||||
if rexp.match(value):
|
||||
return canonical_form
|
||||
return None
|
|
@ -1,77 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from guessit import base_text_type
|
||||
|
||||
group_delimiters = ['()', '[]', '{}']
|
||||
|
||||
# separator character regexp
|
||||
sep = r'[][,)(}:{+ /\._-]' # regexp art, hehe :D
|
||||
|
||||
_dash = '-'
|
||||
_psep = '[\W_]?'
|
||||
|
||||
|
||||
def build_or_pattern(patterns):
|
||||
"""Build a or pattern string from a list of possible patterns
|
||||
"""
|
||||
or_pattern = ''
|
||||
for pattern in patterns:
|
||||
if not or_pattern:
|
||||
or_pattern += '(?:'
|
||||
else:
|
||||
or_pattern += '|'
|
||||
or_pattern += ('(?:%s)' % pattern)
|
||||
or_pattern += ')'
|
||||
return or_pattern
|
||||
|
||||
|
||||
def compile_pattern(pattern, enhance=True):
|
||||
"""Compile and enhance a pattern
|
||||
|
||||
:param pattern: Pattern to compile (regexp).
|
||||
:type pattern: string
|
||||
|
||||
:param pattern: Enhance pattern before compiling.
|
||||
:type pattern: string
|
||||
|
||||
:return: The compiled pattern
|
||||
:rtype: regular expression object
|
||||
"""
|
||||
return re.compile(enhance_pattern(pattern) if enhance else pattern, re.IGNORECASE)
|
||||
|
||||
|
||||
def enhance_pattern(pattern):
|
||||
"""Enhance pattern to match more equivalent values.
|
||||
|
||||
'-' are replaced by '[\W_]?', which matches more types of separators (or none)
|
||||
|
||||
:param pattern: Pattern to enhance (regexp).
|
||||
:type pattern: string
|
||||
|
||||
:return: The enhanced pattern
|
||||
:rtype: string
|
||||
"""
|
||||
return pattern.replace(_dash, _psep)
|
|
@ -1,32 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
subtitle_exts = ['srt', 'idx', 'sub', 'ssa']
|
||||
|
||||
info_exts = ['nfo']
|
||||
|
||||
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
|
||||
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
|
||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
|
||||
'iso']
|
|
@ -1,150 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
digital_numeral = '\d{1,3}'
|
||||
|
||||
roman_numeral = "(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})"
|
||||
|
||||
english_word_numeral_list = [
|
||||
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
|
||||
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
|
||||
]
|
||||
|
||||
french_word_numeral_list = [
|
||||
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
|
||||
]
|
||||
|
||||
french_alt_word_numeral_list = [
|
||||
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
|
||||
]
|
||||
|
||||
|
||||
def __build_word_numeral(*args, **kwargs):
|
||||
re = None
|
||||
for word_list in args:
|
||||
for word in word_list:
|
||||
if not re:
|
||||
re = '(?:(?=\w+)'
|
||||
else:
|
||||
re += '|'
|
||||
re += word
|
||||
re += ')'
|
||||
return re
|
||||
|
||||
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
|
||||
|
||||
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
|
||||
|
||||
__romanNumeralMap = (
|
||||
('M', 1000),
|
||||
('CM', 900),
|
||||
('D', 500),
|
||||
('CD', 400),
|
||||
('C', 100),
|
||||
('XC', 90),
|
||||
('L', 50),
|
||||
('XL', 40),
|
||||
('X', 10),
|
||||
('IX', 9),
|
||||
('V', 5),
|
||||
('IV', 4),
|
||||
('I', 1)
|
||||
)
|
||||
|
||||
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
|
||||
|
||||
|
||||
def __parse_roman(value):
|
||||
"""convert Roman numeral to integer"""
|
||||
if not __romanNumeralPattern.search(value):
|
||||
raise ValueError('Invalid Roman numeral: %s' % value)
|
||||
|
||||
result = 0
|
||||
index = 0
|
||||
for numeral, integer in __romanNumeralMap:
|
||||
while value[index:index + len(numeral)] == numeral:
|
||||
result += integer
|
||||
index += len(numeral)
|
||||
return result
|
||||
|
||||
|
||||
def __parse_word(value):
|
||||
"""Convert Word numeral to integer"""
|
||||
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
|
||||
try:
|
||||
return word_list.index(value)
|
||||
except ValueError:
|
||||
pass
|
||||
raise ValueError
|
||||
|
||||
|
||||
_clean_re = re.compile('[^\d]*(\d+)[^\d]*')
|
||||
|
||||
|
||||
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
|
||||
"""Parse a numeric value into integer.
|
||||
|
||||
input can be an integer as a string, a roman numeral or a word
|
||||
|
||||
:param value: Value to parse. Can be an integer, roman numeral or word.
|
||||
:type value: string
|
||||
|
||||
:return: Numeric value, or None if value can't be parsed
|
||||
:rtype: int
|
||||
"""
|
||||
if int_enabled:
|
||||
try:
|
||||
if clean:
|
||||
match = _clean_re.match(value)
|
||||
if match:
|
||||
clean_value = match.group(1)
|
||||
return int(clean_value)
|
||||
return int(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if roman_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_roman(word)
|
||||
except ValueError:
|
||||
pass
|
||||
return __parse_roman(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if word_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_word(word)
|
||||
except ValueError:
|
||||
pass
|
||||
return __parse_word(value)
|
||||
except ValueError:
|
||||
pass
|
||||
raise ValueError('Invalid numeral: ' + value)
|
|
@ -1,21 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
@ -1,186 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from stevedore import ExtensionManager
|
||||
from pkg_resources import EntryPoint
|
||||
|
||||
from stevedore.extension import Extension
|
||||
from logging import getLogger
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
|
||||
class Transformer(object): # pragma: no cover
|
||||
def __init__(self, priority=0):
|
||||
self.priority = priority
|
||||
self.log = getLogger(self.name)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
def supported_properties(self):
|
||||
return {}
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
return None
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
return True
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
pass
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
pass
|
||||
|
||||
def rate_quality(self, guess, *props):
|
||||
return 0
|
||||
|
||||
|
||||
class CustomTransformerExtensionManager(ExtensionManager):
|
||||
def __init__(self, namespace='guessit.transformer', invoke_on_load=True,
|
||||
invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
super(CustomTransformerExtensionManager, self).__init__(namespace=namespace,
|
||||
invoke_on_load=invoke_on_load,
|
||||
invoke_args=invoke_args,
|
||||
invoke_kwds=invoke_kwds,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback,
|
||||
verify_requirements=verify_requirements)
|
||||
|
||||
def order_extensions(self, extensions):
|
||||
"""Order the loaded transformers
|
||||
|
||||
It should follow those rules
|
||||
- website before language (eg: tvu.org.ru vs russian)
|
||||
- language before episodes_rexps
|
||||
- properties before language (eg: he-aac vs hebrew)
|
||||
- release_group before properties (eg: XviD-?? vs xvid)
|
||||
"""
|
||||
extensions.sort(key=lambda ext: -ext.obj.priority)
|
||||
return extensions
|
||||
|
||||
def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
|
||||
if not ep.dist:
|
||||
plugin = ep.load(require=False)
|
||||
else:
|
||||
plugin = ep.load(require=verify_requirements)
|
||||
if invoke_on_load:
|
||||
obj = plugin(*invoke_args, **invoke_kwds)
|
||||
else:
|
||||
obj = None
|
||||
return Extension(ep.name, ep, plugin, obj)
|
||||
|
||||
def _load_plugins(self, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
|
||||
return self.order_extensions(super(CustomTransformerExtensionManager, self)._load_plugins(invoke_on_load, invoke_args, invoke_kwds, verify_requirements))
|
||||
|
||||
def objects(self):
|
||||
return self.map(self._get_obj)
|
||||
|
||||
def _get_obj(self, ext):
|
||||
return ext.obj
|
||||
|
||||
def object(self, name):
|
||||
try:
|
||||
return self[name].obj
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def register_module(self, name, module_name):
|
||||
ep = EntryPoint(name, module_name)
|
||||
loaded = self._load_one_plugin(ep, invoke_on_load=True, invoke_args=(), invoke_kwds={})
|
||||
if loaded:
|
||||
self.extensions.append(loaded)
|
||||
self.extensions = self.order_extensions(self.extensions)
|
||||
self._extensions_by_name = None
|
||||
|
||||
|
||||
class DefaultTransformerExtensionManager(CustomTransformerExtensionManager):
|
||||
@property
|
||||
def _internal_entry_points(self):
|
||||
return ['split_path_components = guessit.transfo.split_path_components:SplitPathComponents',
|
||||
'guess_filetype = guessit.transfo.guess_filetype:GuessFiletype',
|
||||
'split_explicit_groups = guessit.transfo.split_explicit_groups:SplitExplicitGroups',
|
||||
'guess_date = guessit.transfo.guess_date:GuessDate',
|
||||
'guess_website = guessit.transfo.guess_website:GuessWebsite',
|
||||
'guess_release_group = guessit.transfo.guess_release_group:GuessReleaseGroup',
|
||||
'guess_properties = guessit.transfo.guess_properties:GuessProperties',
|
||||
'guess_language = guessit.transfo.guess_language:GuessLanguage',
|
||||
'guess_video_rexps = guessit.transfo.guess_video_rexps:GuessVideoRexps',
|
||||
'guess_episodes_rexps = guessit.transfo.guess_episodes_rexps:GuessEpisodesRexps',
|
||||
'guess_weak_episodes_rexps = guessit.transfo.guess_weak_episodes_rexps:GuessWeakEpisodesRexps',
|
||||
'guess_bonus_features = guessit.transfo.guess_bonus_features:GuessBonusFeatures',
|
||||
'guess_year = guessit.transfo.guess_year:GuessYear',
|
||||
'guess_country = guessit.transfo.guess_country:GuessCountry',
|
||||
'guess_idnumber = guessit.transfo.guess_idnumber:GuessIdnumber',
|
||||
'split_on_dash = guessit.transfo.split_on_dash:SplitOnDash',
|
||||
'guess_episode_info_from_position = guessit.transfo.guess_episode_info_from_position:GuessEpisodeInfoFromPosition',
|
||||
'guess_movie_title_from_position = guessit.transfo.guess_movie_title_from_position:GuessMovieTitleFromPosition',
|
||||
'guess_episode_special = guessit.transfo.guess_episode_special:GuessEpisodeSpecial']
|
||||
|
||||
def _find_entry_points(self, namespace):
|
||||
entry_points = {}
|
||||
# Internal entry points
|
||||
if namespace == self.namespace:
|
||||
for internal_entry_point_str in self._internal_entry_points:
|
||||
internal_entry_point = EntryPoint.parse(internal_entry_point_str)
|
||||
entry_points[internal_entry_point.name] = internal_entry_point
|
||||
|
||||
# Package entry points
|
||||
setuptools_entrypoints = super(DefaultTransformerExtensionManager, self)._find_entry_points(namespace)
|
||||
for setuptools_entrypoint in setuptools_entrypoints:
|
||||
entry_points[setuptools_entrypoint.name] = setuptools_entrypoint
|
||||
|
||||
return list(entry_points.values())
|
||||
|
||||
_extensions = None
|
||||
|
||||
|
||||
def all_transformers():
|
||||
return _extensions.objects()
|
||||
|
||||
|
||||
def get_transformer(name):
|
||||
return _extensions.object(name)
|
||||
|
||||
|
||||
def add_transformer(name, module_name):
|
||||
_extensions.register_module(name, module_name)
|
||||
|
||||
|
||||
def reload(custom=False):
|
||||
"""
|
||||
Reload extension manager with default or custom one.
|
||||
:param custom: if True, custom manager will be used, else default one.
|
||||
Default manager will load default extensions from guessit and setuptools packaging extensions
|
||||
Custom manager will not load default extensions from guessit, using only setuptools packaging extensions.
|
||||
:type custom: boolean
|
||||
"""
|
||||
global _extensions
|
||||
if custom:
|
||||
_extensions = CustomTransformerExtensionManager()
|
||||
else:
|
||||
_extensions = DefaultTransformerExtensionManager()
|
||||
|
||||
reload()
|
|
@ -1,65 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import all_transformers
|
||||
|
||||
|
||||
def best_quality_properties(props, *guesses):
|
||||
"""Retrieve the best quality guess, based on given properties
|
||||
|
||||
:param props: Properties to include in the rating
|
||||
:type props: list of strings
|
||||
:param guesses: Guesses to rate
|
||||
:type guesses: :class:`guessit.guess.Guess`
|
||||
|
||||
:return: Best quality guess from all passed guesses
|
||||
:rtype: :class:`guessit.guess.Guess`
|
||||
"""
|
||||
best_guess = None
|
||||
best_rate = None
|
||||
for guess in guesses:
|
||||
for transformer in all_transformers():
|
||||
rate = transformer.rate_quality(guess, *props)
|
||||
if best_rate is None or best_rate < rate:
|
||||
best_rate = rate
|
||||
best_guess = guess
|
||||
return best_guess
|
||||
|
||||
|
||||
def best_quality(*guesses):
|
||||
"""Retrieve the best quality guess.
|
||||
|
||||
:param guesses: Guesses to rate
|
||||
:type guesses: :class:`guessit.guess.Guess`
|
||||
|
||||
:return: Best quality guess from all passed guesses
|
||||
:rtype: :class:`guessit.guess.Guess`
|
||||
"""
|
||||
best_guess = None
|
||||
best_rate = None
|
||||
for guess in guesses:
|
||||
for transformer in all_transformers():
|
||||
rate = transformer.rate_quality(guess)
|
||||
if best_rate is None or best_rate < rate:
|
||||
best_rate = rate
|
||||
best_guess = guess
|
||||
return best_guess
|
|
@ -1,28 +1,28 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Smewt - A smart collection manager
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# Smewt is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# Smewt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
import os, os.path
|
||||
|
||||
|
||||
GREEN_FONT = "\x1B[0;32m"
|
||||
YELLOW_FONT = "\x1B[0;33m"
|
||||
|
@ -31,7 +31,7 @@ RED_FONT = "\x1B[0;31m"
|
|||
RESET_FONT = "\x1B[0m"
|
||||
|
||||
|
||||
def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): # pragma: no cover
|
||||
def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):
|
||||
"""Set up a nice colored logger as the main application logger."""
|
||||
|
||||
class SimpleFormatter(logging.Formatter):
|
||||
|
|
|
@ -1,26 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import logging
|
||||
from guessit.slogging import setupLogging
|
||||
setupLogging()
|
||||
logging.disable(logging.INFO)
|
|
@ -1,40 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from guessit.test import (test_api, test_autodetect, test_autodetect_all, test_doctests,
|
||||
test_episode, test_hashes, test_language, test_main,
|
||||
test_matchtree, test_movie, test_quality, test_utils)
|
||||
from unittest import TextTestRunner
|
||||
|
||||
|
||||
import logging
|
||||
|
||||
def main():
|
||||
for suite in [test_api.suite, test_autodetect.suite,
|
||||
test_autodetect_all.suite, test_doctests.suite,
|
||||
test_episode.suite, test_hashes.suite, test_language.suite,
|
||||
test_main.suite, test_matchtree.suite, test_movie.suite,
|
||||
test_quality.suite, test_utils.suite]:
|
||||
TextTestRunner(verbosity=2).run(suite)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,289 +0,0 @@
|
|||
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
|
||||
: type: movie
|
||||
title: Fear and Loathing in Las Vegas
|
||||
year: 1998
|
||||
screenSize: 720p
|
||||
format: HD-DVD
|
||||
audioCodec: DTS
|
||||
videoCodec: h264
|
||||
releaseGroup: ESiR
|
||||
|
||||
? Leopard.dmg
|
||||
: type: unknown
|
||||
extension: dmg
|
||||
|
||||
? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
|
||||
: type: episode
|
||||
series: Duckman
|
||||
season: 1
|
||||
episodeNumber: 1
|
||||
title: I, Duckman
|
||||
date: 2002-11-07
|
||||
|
||||
? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
|
||||
: type: episode
|
||||
series: Neverwhere
|
||||
episodeNumber: 5
|
||||
title: Down Street
|
||||
website: tvu.org.ru
|
||||
|
||||
? Neverwhere.05.Down.Street.[tvu.org.ru].avi
|
||||
: type: episode
|
||||
series: Neverwhere
|
||||
episodeNumber: 5
|
||||
title: Down Street
|
||||
website: tvu.org.ru
|
||||
|
||||
? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
|
||||
: type: episode
|
||||
series: Breaking Bad
|
||||
episodeFormat: Minisode
|
||||
episodeNumber: 1
|
||||
title: Good Cop Bad Cop
|
||||
format: WEBRip
|
||||
videoCodec: XviD
|
||||
|
||||
? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
|
||||
: type: episode
|
||||
series: Kaamelott
|
||||
episodeNumber: 23
|
||||
title: Le Forfait
|
||||
|
||||
? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
|
||||
: type: movie
|
||||
title: The Doors
|
||||
year: 1991
|
||||
date: 2008-03-09
|
||||
format: BluRay
|
||||
screenSize: 720p
|
||||
audioCodec: AC3
|
||||
videoCodec: h264
|
||||
releaseGroup: HiS@SiLUHD
|
||||
language: english
|
||||
website: sharethefiles.com
|
||||
|
||||
? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
|
||||
: type: movie
|
||||
title: M.A.S.H.
|
||||
year: 1970
|
||||
videoCodec: DivX
|
||||
format: DVD
|
||||
|
||||
? the.mentalist.501.hdtv-lol.mp4
|
||||
: type: episode
|
||||
series: The Mentalist
|
||||
season: 5
|
||||
episodeNumber: 1
|
||||
format: HDTV
|
||||
releaseGroup: LOL
|
||||
|
||||
? the.simpsons.2401.hdtv-lol.mp4
|
||||
: type: episode
|
||||
series: The Simpsons
|
||||
season: 24
|
||||
episodeNumber: 1
|
||||
format: HDTV
|
||||
releaseGroup: LOL
|
||||
|
||||
? Homeland.S02E01.HDTV.x264-EVOLVE.mp4
|
||||
: type: episode
|
||||
series: Homeland
|
||||
season: 2
|
||||
episodeNumber: 1
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: EVOLVE
|
||||
|
||||
? /media/Band_of_Brothers-e01-Currahee.mkv
|
||||
: type: episode
|
||||
series: Band of Brothers
|
||||
episodeNumber: 1
|
||||
title: Currahee
|
||||
|
||||
? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
|
||||
: type: episode
|
||||
series: Band of Brothers
|
||||
bonusNumber: 2
|
||||
bonusTitle: We Stand Alone Together
|
||||
|
||||
? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
|
||||
: type: movie
|
||||
title: Casino Royale
|
||||
filmSeries: James Bond
|
||||
filmNumber: 21
|
||||
bonusNumber: 2
|
||||
bonusTitle: Stunts
|
||||
|
||||
? /TV Shows/new.girl.117.hdtv-lol.mp4
|
||||
: type: episode
|
||||
series: New Girl
|
||||
season: 1
|
||||
episodeNumber: 17
|
||||
format: HDTV
|
||||
releaseGroup: LOL
|
||||
|
||||
? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
|
||||
: type: episode
|
||||
series: The Office (US)
|
||||
country: US
|
||||
season: 1
|
||||
episodeNumber: 3
|
||||
title: Health Care
|
||||
format: HDTV
|
||||
videoCodec: XviD
|
||||
releaseGroup: LOL
|
||||
|
||||
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
|
||||
: type: movie
|
||||
title: The Insider
|
||||
year: 1999
|
||||
bonusNumber: 2
|
||||
bonusTitle: 60 Minutes Interview-1996
|
||||
|
||||
? OSS_117--Cairo,_Nest_of_Spies.mkv
|
||||
: type: movie
|
||||
title: OSS 117--Cairo, Nest of Spies
|
||||
|
||||
? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
|
||||
: type: movie
|
||||
title: Rush Beyond The Lighted Stage
|
||||
bonusNumber: 9
|
||||
bonusTitle: Between Sun and Moon-2002 Hartford
|
||||
|
||||
? House.Hunters.International.S56E06.720p.hdtv.x264.mp4
|
||||
: type: episode
|
||||
series: House Hunters International
|
||||
season: 56
|
||||
episodeNumber: 6
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
|
||||
? White.House.Down.2013.1080p.BluRay.DTS-HD.MA.5.1.x264-PublicHD.mkv
|
||||
: type: movie
|
||||
title: White House Down
|
||||
year: 2013
|
||||
screenSize: 1080p
|
||||
format: BluRay
|
||||
audioCodec: DTS
|
||||
audioProfile: HDMA
|
||||
videoCodec: h264
|
||||
releaseGroup: PublicHD
|
||||
audioChannels: "5.1"
|
||||
|
||||
? Hostages.S01E01.Pilot.for.Air.720p.WEB-DL.DD5.1.H.264-NTb.nfo
|
||||
: type: episodeinfo
|
||||
series: Hostages
|
||||
title: Pilot for Air
|
||||
season: 1
|
||||
episodeNumber: 1
|
||||
screenSize: 720p
|
||||
format: WEB-DL
|
||||
audioChannels: "5.1"
|
||||
videoCodec: h264
|
||||
audioCodec: DolbyDigital
|
||||
releaseGroup: NTb
|
||||
|
||||
? Despicable.Me.2.2013.1080p.BluRay.x264-VeDeTT.nfo
|
||||
: type: movieinfo
|
||||
title: Despicable Me 2
|
||||
year: 2013
|
||||
screenSize: 1080p
|
||||
format: BluRay
|
||||
videoCodec: h264
|
||||
releaseGroup: VeDeTT
|
||||
|
||||
? Le Cinquieme Commando 1971 SUBFORCED FRENCH DVDRiP XViD AC3 Bandix.mkv
|
||||
: type: movie
|
||||
audioCodec: AC3
|
||||
format: DVD
|
||||
releaseGroup: Bandix
|
||||
subtitleLanguage: French
|
||||
title: Le Cinquieme Commando
|
||||
videoCodec: XviD
|
||||
year: 1971
|
||||
|
||||
? Le Seigneur des Anneaux - La Communauté de l'Anneau - Version Longue - BDRip.mkv
|
||||
: type: movie
|
||||
format: BluRay
|
||||
title: Le Seigneur des Anneaux
|
||||
|
||||
? La petite bande (Michel Deville - 1983) VF PAL MP4 x264 AAC.mkv
|
||||
: type: movie
|
||||
audioCodec: AAC
|
||||
language: French
|
||||
title: La petite bande
|
||||
videoCodec: h264
|
||||
year: 1983
|
||||
|
||||
? Retour de Flammes (Gregor Schnitzler 2003) FULL DVD.iso
|
||||
: type: movie
|
||||
format: DVD
|
||||
title: Retour de Flammes
|
||||
type: movie
|
||||
year: 2003
|
||||
|
||||
? A.Common.Title.Special.2014.avi
|
||||
: type: movie
|
||||
year: 2014
|
||||
title: A Common Title Special
|
||||
|
||||
? A.Common.Title.2014.Special.avi
|
||||
: type: episode
|
||||
year: 2014
|
||||
series: A Common Title
|
||||
title: Special
|
||||
special: Special
|
||||
|
||||
? A.Common.Title.2014.Special.Edition.avi
|
||||
: type: movie
|
||||
year: 2014
|
||||
title: A Common Title
|
||||
edition: Special Edition
|
||||
|
||||
? Downton.Abbey.2013.Christmas.Special.HDTV.x264-FoV.mp4
|
||||
: type: episode
|
||||
year: 2013
|
||||
series: Downton Abbey
|
||||
title: Christmas Special
|
||||
videoCodec: h264
|
||||
releaseGroup: FoV
|
||||
format: HDTV
|
||||
special: Special
|
||||
|
||||
? Doctor_Who_2013_Christmas_Special.The_Time_of_The_Doctor.HD
|
||||
: options: -n
|
||||
type: episode
|
||||
series: Doctor Who
|
||||
other: HD
|
||||
special: Special
|
||||
title: Christmas Special The Time of The Doctor
|
||||
year: 2013
|
||||
|
||||
? Doctor Who 2005 50th Anniversary Special The Day of the Doctor 3.avi
|
||||
: type: episode
|
||||
series: Doctor Who
|
||||
special: Special
|
||||
title: 50th Anniversary Special The Day of the Doctor 3
|
||||
year: 2005
|
||||
|
||||
? Robot Chicken S06-Born Again Virgin Christmas Special HDTV x264.avi
|
||||
: type: episode
|
||||
series: Robot Chicken
|
||||
format: HDTV
|
||||
season: 6
|
||||
title: Born Again Virgin Christmas Special
|
||||
videoCodec: h264
|
||||
special: Special
|
||||
|
||||
? Wicked.Tuna.S03E00.Head.To.Tail.Special.HDTV.x264-YesTV
|
||||
: options: -n
|
||||
type: episode
|
||||
series: Wicked Tuna
|
||||
title: Head To Tail Special
|
||||
releaseGroup: YesTV
|
||||
season: 3
|
||||
episodeNumber: 0
|
||||
videoCodec: h264
|
||||
format: HDTV
|
||||
special: Special
|
|
@ -1 +0,0 @@
|
|||
Just a dummy srt file (used for unittests: do not remove!)
|
|
@ -1,569 +0,0 @@
|
|||
# Dubious tests
|
||||
#
|
||||
#? "finale "
|
||||
#: releaseGroup: FiNaLe
|
||||
# extension: ""
|
||||
|
||||
|
||||
? Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.avi
|
||||
: series: Californication
|
||||
season: 2
|
||||
episodeNumber: 5
|
||||
title: Vaginatown
|
||||
format: HDTV
|
||||
videoCodec: XviD
|
||||
releaseGroup: 0TV
|
||||
|
||||
? Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi
|
||||
: series: Dexter
|
||||
season: 5
|
||||
episodeNumber: 2
|
||||
title: Hello, Bandit
|
||||
language: English
|
||||
subtitleLanguage: French
|
||||
format: HDTV
|
||||
videoCodec: XviD
|
||||
releaseGroup: AlFleNi-TeaM
|
||||
website: tvu.org.ru
|
||||
|
||||
? Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi
|
||||
: series: Treme
|
||||
season: 1
|
||||
episodeNumber: 3
|
||||
title: Right Place, Wrong Time
|
||||
format: HDTV
|
||||
videoCodec: XviD
|
||||
releaseGroup: NoTV
|
||||
|
||||
? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
|
||||
: series: Duckman
|
||||
season: 1
|
||||
episodeNumber: 1
|
||||
title: I, Duckman
|
||||
date: 2002-11-07
|
||||
|
||||
? Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi
|
||||
: series: Duckman
|
||||
season: 1
|
||||
episodeNumber: 13
|
||||
title: Joking The Chicken
|
||||
|
||||
? Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.avi
|
||||
: series: The Simpsons
|
||||
season: 12
|
||||
episodeNumber: 8
|
||||
title: A Bas Le Sergent Skinner
|
||||
language: French
|
||||
|
||||
? Series/Futurama/Season 3 (mkv)/[™] Futurama - S03E22 - Le chef de fer à 30% ( 30 Percent Iron Chef ).mkv
|
||||
: series: Futurama
|
||||
season: 3
|
||||
episodeNumber: 22
|
||||
title: Le chef de fer à 30%
|
||||
|
||||
? Series/The Office/Season 6/The Office - S06xE01.avi
|
||||
: series: The Office
|
||||
season: 6
|
||||
episodeNumber: 1
|
||||
|
||||
? series/The Office/Season 4/The Office [401] Fun Run.avi
|
||||
: series: The Office
|
||||
season: 4
|
||||
episodeNumber: 1
|
||||
title: Fun Run
|
||||
|
||||
? Series/Mad Men Season 1 Complete/Mad.Men.S01E01.avi
|
||||
: series: Mad Men
|
||||
season: 1
|
||||
episodeNumber: 1
|
||||
other: complete
|
||||
|
||||
? series/Psych/Psych S02 Season 2 Complete English DVD/Psych.S02E02.65.Million.Years.Off.avi
|
||||
: series: Psych
|
||||
season: 2
|
||||
episodeNumber: 2
|
||||
title: 65 Million Years Off
|
||||
language: english
|
||||
format: DVD
|
||||
other: complete
|
||||
|
||||
? series/Psych/Psych S02 Season 2 Complete English DVD/Psych.S02E03.Psy.Vs.Psy.Français.srt
|
||||
: series: Psych
|
||||
season: 2
|
||||
episodeNumber: 3
|
||||
title: Psy Vs Psy
|
||||
format: DVD
|
||||
language: English
|
||||
subtitleLanguage: French
|
||||
other: complete
|
||||
|
||||
? Series/Pure Laine/Pure.Laine.1x01.Toutes.Couleurs.Unies.FR.(Québec).DVB-Kceb.[tvu.org.ru].avi
|
||||
: series: Pure Laine
|
||||
season: 1
|
||||
episodeNumber: 1
|
||||
title: Toutes Couleurs Unies
|
||||
format: DVB
|
||||
releaseGroup: Kceb
|
||||
language: french
|
||||
website: tvu.org.ru
|
||||
|
||||
? Series/Pure Laine/2x05 - Pure Laine - Je Me Souviens.avi
|
||||
: series: Pure Laine
|
||||
season: 2
|
||||
episodeNumber: 5
|
||||
title: Je Me Souviens
|
||||
|
||||
? Series/Tout sur moi/Tout sur moi - S02E02 - Ménage à trois (14-01-2008) [Rip by Ampli].avi
|
||||
: series: Tout sur moi
|
||||
season: 2
|
||||
episodeNumber: 2
|
||||
title: Ménage à trois
|
||||
date: 2008-01-14
|
||||
|
||||
? The.Mentalist.2x21.18-5-4.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi
|
||||
: series: The Mentalist
|
||||
season: 2
|
||||
episodeNumber: 21
|
||||
title: 18-5-4
|
||||
language: english
|
||||
subtitleLanguage: french
|
||||
format: HDTV
|
||||
videoCodec: Xvid
|
||||
releaseGroup: AlFleNi-TeaM
|
||||
website: tvu.org.ru
|
||||
|
||||
? series/__ Incomplete __/Dr Slump (Catalan)/Dr._Slump_-_003_DVB-Rip_Catalan_by_kelf.avi
|
||||
: series: Dr Slump
|
||||
episodeNumber: 3
|
||||
format: DVB
|
||||
language: catalan
|
||||
|
||||
? series/Ren and Stimpy - Black_hole_[DivX].avi
|
||||
: series: Ren and Stimpy
|
||||
title: Black hole
|
||||
videoCodec: DivX
|
||||
|
||||
? Series/Walt Disney/Donald.Duck.-.Good.Scouts.[www.bigernie.jump.to].avi
|
||||
: series: Donald Duck
|
||||
title: Good Scouts
|
||||
website: www.bigernie.jump.to
|
||||
|
||||
? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
|
||||
: series: Neverwhere
|
||||
episodeNumber: 5
|
||||
title: Down Street
|
||||
website: tvu.org.ru
|
||||
|
||||
? Series/South Park/Season 4/South.Park.4x07.Cherokee.Hair.Tampons.DVDRip.[tvu.org.ru].avi
|
||||
: series: South Park
|
||||
season: 4
|
||||
episodeNumber: 7
|
||||
title: Cherokee Hair Tampons
|
||||
format: DVD
|
||||
website: tvu.org.ru
|
||||
|
||||
? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
|
||||
: series: Kaamelott
|
||||
episodeNumber: 23
|
||||
title: Le Forfait
|
||||
|
||||
? Series/Duckman/Duckman - 110 (10) - 20021218 - Cellar Beware.avi
|
||||
: series: Duckman
|
||||
season: 1
|
||||
episodeNumber: 10
|
||||
date: 2002-12-18
|
||||
title: Cellar Beware
|
||||
|
||||
? Series/Ren & Stimpy/Ren And Stimpy - Onward & Upward-Adult Party Cartoon.avi
|
||||
: series: Ren And Stimpy
|
||||
title: Onward & Upward-Adult Party Cartoon
|
||||
|
||||
? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
|
||||
: series: Breaking Bad
|
||||
episodeFormat: Minisode
|
||||
episodeNumber: 1
|
||||
title: Good Cop Bad Cop
|
||||
format: WEBRip
|
||||
videoCodec: XviD
|
||||
|
||||
? Series/My Name Is Earl/My.Name.Is.Earl.S01Extras.-.Bad.Karma.DVDRip.XviD.avi
|
||||
: series: My Name Is Earl
|
||||
season: 1
|
||||
title: Bad Karma
|
||||
format: DVD
|
||||
special: Extras
|
||||
videoCodec: XviD
|
||||
|
||||
? /mnt/series/The Big Bang Theory/S01/The.Big.Bang.Theory.S01E01.mkv
|
||||
: series: The Big Bang Theory
|
||||
season: 1
|
||||
episodeNumber: 1
|
||||
|
||||
? /media/Parks_and_Recreation-s03-e01.mkv
|
||||
: series: Parks and Recreation
|
||||
season: 3
|
||||
episodeNumber: 1
|
||||
|
||||
? /media/Parks_and_Recreation-s03-e02-Flu_Season.mkv
|
||||
: series: Parks and Recreation
|
||||
season: 3
|
||||
title: Flu Season
|
||||
episodeNumber: 2
|
||||
|
||||
? /media/Parks_and_Recreation-s03-x01.mkv
|
||||
: series: Parks and Recreation
|
||||
season: 3
|
||||
bonusNumber: 1
|
||||
|
||||
? /media/Parks_and_Recreation-s03-x02-Gag_Reel.mkv
|
||||
: series: Parks and Recreation
|
||||
season: 3
|
||||
bonusNumber: 2
|
||||
bonusTitle: Gag Reel
|
||||
|
||||
? /media/Band_of_Brothers-e01-Currahee.mkv
|
||||
: series: Band of Brothers
|
||||
episodeNumber: 1
|
||||
title: Currahee
|
||||
|
||||
? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
|
||||
: series: Band of Brothers
|
||||
bonusNumber: 2
|
||||
bonusTitle: We Stand Alone Together
|
||||
|
||||
? /TV Shows/Mad.M-5x9.mkv
|
||||
: series: Mad M
|
||||
season: 5
|
||||
episodeNumber: 9
|
||||
|
||||
? /TV Shows/new.girl.117.hdtv-lol.mp4
|
||||
: series: New Girl
|
||||
season: 1
|
||||
episodeNumber: 17
|
||||
format: HDTV
|
||||
releaseGroup: LOL
|
||||
|
||||
? Kaamelott - 5x44x45x46x47x48x49x50.avi
|
||||
: series: Kaamelott
|
||||
season: 5
|
||||
episodeNumber: 44
|
||||
episodeList: [44, 45, 46, 47, 48, 49, 50]
|
||||
|
||||
? Example S01E01-02.avi
|
||||
: series: Example
|
||||
season: 1
|
||||
episodeNumber: 1
|
||||
episodeList: [1, 2]
|
||||
|
||||
? Example S01E01E02.avi
|
||||
: series: Example
|
||||
season: 1
|
||||
episodeNumber: 1
|
||||
episodeList: [1, 2]
|
||||
|
||||
? Series/Baccano!/Baccano!_-_T1_-_Trailer_-_[Ayu](dae8173e).mkv
|
||||
: series: Baccano!
|
||||
other: Trailer
|
||||
|
||||
? Series/Doctor Who (2005)/Season 06/Doctor Who (2005) - S06E01 - The Impossible Astronaut (1).avi
|
||||
: series: Doctor Who
|
||||
year: 2005
|
||||
season: 6
|
||||
episodeNumber: 1
|
||||
title: The Impossible Astronaut
|
||||
|
||||
? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
|
||||
: series: The Office (US)
|
||||
country: US
|
||||
season: 1
|
||||
episodeNumber: 3
|
||||
title: Health Care
|
||||
format: HDTV
|
||||
videoCodec: XviD
|
||||
releaseGroup: LOL
|
||||
|
||||
? /Volumes/data-1/Series/Futurama/Season 3/Futurama_-_S03_DVD_Bonus_-_Deleted_Scenes_Part_3.ogm
|
||||
: series: Futurama
|
||||
season: 3
|
||||
other: Bonus
|
||||
title: Deleted Scenes Part 3
|
||||
format: DVD
|
||||
|
||||
? Ben.and.Kate.S01E02.720p.HDTV.X264-DIMENSION.mkv
|
||||
: series: Ben and Kate
|
||||
season: 1
|
||||
episodeNumber: 2
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: DIMENSION
|
||||
|
||||
? /volume1/TV Series/Drawn Together/Season 1/Drawn Together 1x04 Requiem for a Reality Show.avi
|
||||
: series: Drawn Together
|
||||
season: 1
|
||||
episodeNumber: 4
|
||||
title: Requiem for a Reality Show
|
||||
|
||||
? Sons.of.Anarchy.S05E06.720p.WEB.DL.DD5.1.H.264-CtrlHD.mkv
|
||||
: series: Sons of Anarchy
|
||||
season: 5
|
||||
episodeNumber: 6
|
||||
screenSize: 720p
|
||||
format: WEB-DL
|
||||
audioChannels: "5.1"
|
||||
audioCodec: DolbyDigital
|
||||
videoCodec: h264
|
||||
releaseGroup: CtrlHD
|
||||
|
||||
? /media/bdc64bfe-e36f-4af8-b550-e6fd2dfaa507/TV_Shows/Doctor Who (2005)/Saison 6/Doctor Who (2005) - S06E13 - The Wedding of River Song.mkv
|
||||
: series: Doctor Who
|
||||
season: 6
|
||||
episodeNumber: 13
|
||||
year: 2005
|
||||
title: The Wedding of River Song
|
||||
idNumber: bdc64bfe-e36f-4af8-b550-e6fd2dfaa507
|
||||
|
||||
? /mnt/videos/tvshows/Doctor Who/Season 06/E13 - The Wedding of River Song.mkv
|
||||
: series: Doctor Who
|
||||
season: 6
|
||||
episodeNumber: 13
|
||||
title: The Wedding of River Song
|
||||
|
||||
? The.Simpsons.S24E03.Adventures.in.Baby-Getting.720p.WEB-DL.DD5.1.H.264-CtrlHD.mkv
|
||||
: series: The Simpsons
|
||||
season: 24
|
||||
episodeNumber: 3
|
||||
title: Adventures in Baby-Getting
|
||||
screenSize: 720p
|
||||
format: WEB-DL
|
||||
audioChannels: "5.1"
|
||||
audioCodec: DolbyDigital
|
||||
videoCodec: h264
|
||||
releaseGroup: CtrlHD
|
||||
|
||||
? /home/disaster/Videos/TV/Merlin/merlin_2008.5x02.arthurs_bane_part_two.repack.720p_hdtv_x264-fov.mkv
|
||||
: series: Merlin
|
||||
season: 5
|
||||
episodeNumber: 2
|
||||
title: Arthurs bane part two
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: Fov
|
||||
year: 2008
|
||||
other: Proper
|
||||
|
||||
? "Da Vinci's Demons - 1x04 - The Magician.mkv"
|
||||
: series: "Da Vinci's Demons"
|
||||
season: 1
|
||||
episodeNumber: 4
|
||||
title: The Magician
|
||||
|
||||
? CSI.S013E18.Sheltered.720p.WEB-DL.DD5.1.H.264.mkv
|
||||
: series: CSI
|
||||
season: 13
|
||||
episodeNumber: 18
|
||||
title: Sheltered
|
||||
screenSize: 720p
|
||||
format: WEB-DL
|
||||
audioChannels: "5.1"
|
||||
audioCodec: DolbyDigital
|
||||
videoCodec: h264
|
||||
|
||||
? Game of Thrones S03E06 1080i HDTV DD5.1 MPEG2-TrollHD.ts
|
||||
: series: Game of Thrones
|
||||
season: 3
|
||||
episodeNumber: 6
|
||||
screenSize: 1080i
|
||||
format: HDTV
|
||||
audioChannels: "5.1"
|
||||
audioCodec: DolbyDigital
|
||||
videoCodec: MPEG2
|
||||
releaseGroup: TrollHD
|
||||
|
||||
? gossip.girl.s01e18.hdtv.xvid-2hd.eng.srt
|
||||
: series: gossip girl
|
||||
season: 1
|
||||
episodeNumber: 18
|
||||
format: HDTV
|
||||
videoCodec: XviD
|
||||
releaseGroup: 2HD
|
||||
subtitleLanguage: english
|
||||
|
||||
? Wheels.S03E01E02.720p.HDTV.x264-IMMERSE.mkv
|
||||
: series: Wheels
|
||||
season: 3
|
||||
episodeNumber: 1
|
||||
episodeList: [1, 2]
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: IMMERSE
|
||||
|
||||
? Wheels.S03E01-02.720p.HDTV.x264-IMMERSE.mkv
|
||||
: series: Wheels
|
||||
season: 3
|
||||
episodeNumber: 1
|
||||
episodeList: [1, 2]
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: IMMERSE
|
||||
|
||||
? Wheels.S03E01-E02.720p.HDTV.x264-IMMERSE.mkv
|
||||
: series: Wheels
|
||||
season: 3
|
||||
episodeNumber: 1
|
||||
episodeList: [1, 2]
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: IMMERSE
|
||||
|
||||
? Wheels.S03E01-03.720p.HDTV.x264-IMMERSE.mkv
|
||||
: series: Wheels
|
||||
season: 3
|
||||
episodeNumber: 1
|
||||
episodeList: [1, 2, 3]
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: IMMERSE
|
||||
|
||||
? Marvels.Agents.of.S.H.I.E.L.D.S01E06.720p.HDTV.X264-DIMENSION.mkv
|
||||
: series: Marvels Agents of S.H.I.E.L.D.
|
||||
season: 1
|
||||
episodeNumber: 6
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: DIMENSION
|
||||
|
||||
? Marvels.Agents.of.S.H.I.E.L.D..S01E06.720p.HDTV.X264-DIMENSION.mkv
|
||||
: series: Marvels Agents of S.H.I.E.L.D.
|
||||
season: 1
|
||||
episodeNumber: 6
|
||||
screenSize: 720p
|
||||
format: HDTV
|
||||
videoCodec: h264
|
||||
releaseGroup: DIMENSION
|
||||
|
||||
? Series/Friday Night Lights/Season 1/Friday Night Lights S01E19 - Ch-Ch-Ch-Ch-Changes.avi
|
||||
: series: Friday Night Lights
|
||||
season: 1
|
||||
episodeNumber: 19
|
||||
title: Ch-Ch-Ch-Ch-Changes
|
||||
|
||||
? Dexter Saison VII FRENCH.BDRip.XviD-MiND.nfo
|
||||
: series: Dexter
|
||||
season: 7
|
||||
videoCodec: XviD
|
||||
language: French
|
||||
format: BluRay
|
||||
releaseGroup: MiND
|
||||
|
||||
? Dexter Saison sept FRENCH.BDRip.XviD-MiND.nfo
|
||||
: series: Dexter
|
||||
season: 7
|
||||
videoCodec: XviD
|
||||
language: French
|
||||
format: BluRay
|
||||
releaseGroup: MiND
|
||||
|
||||
? "Pokémon S16 - E29 - 1280*720 HDTV VF.mkv"
|
||||
: series: Pokémon
|
||||
format: HDTV
|
||||
language: French
|
||||
season: 16
|
||||
episodeNumber: 29
|
||||
screenSize: 720p
|
||||
|
||||
? One.Piece.E576.VOSTFR.720p.HDTV.x264-MARINE-FORD.mkv
|
||||
: episodeNumber: 576
|
||||
videoCodec: h264
|
||||
format: HDTV
|
||||
series: One Piece
|
||||
releaseGroup: MARINE-FORD
|
||||
subtitleLanguage: French
|
||||
screenSize: 720p
|
||||
|
||||
? Dexter.S08E12.FINAL.MULTi.1080p.BluRay.x264-MiND.mkv
|
||||
: videoCodec: h264
|
||||
episodeNumber: 12
|
||||
season: 8
|
||||
format: BluRay
|
||||
series: Dexter
|
||||
other: final
|
||||
language: Multiple languages
|
||||
releaseGroup: MiND
|
||||
screenSize: 1080p
|
||||
|
||||
? One Piece - E623 VOSTFR HD [www.manga-ddl-free.com].mkv
|
||||
: website: www.manga-ddl-free.com
|
||||
episodeNumber: 623
|
||||
subtitleLanguage: French
|
||||
series: One Piece
|
||||
other: HD
|
||||
|
||||
? Falling Skies Saison 1.HDLight.720p.x264.VFF.mkv
|
||||
: language: French
|
||||
screenSize: 720p
|
||||
season: 1
|
||||
series: Falling Skies
|
||||
videoCodec: h264
|
||||
|
||||
? Sleepy.Hollow.S01E09.720p.WEB-DL.DD5.1.H.264-BP.mkv
|
||||
: episodeNumber: 9
|
||||
videoCodec: h264
|
||||
format: WEB-DL
|
||||
series: Sleepy Hollow
|
||||
audioChannels: "5.1"
|
||||
screenSize: 720p
|
||||
season: 1
|
||||
videoProfile: BP
|
||||
audioCodec: DolbyDigital
|
||||
|
||||
? Sleepy.Hollow.S01E09.720p.WEB-DL.DD5.1.H.264-BS.mkv
|
||||
: episodeNumber: 9
|
||||
videoCodec: h264
|
||||
format: WEB-DL
|
||||
series: Sleepy Hollow
|
||||
audioChannels: "5.1"
|
||||
screenSize: 720p
|
||||
season: 1
|
||||
releaseGroup: BS
|
||||
audioCodec: DolbyDigital
|
||||
|
||||
? Battlestar.Galactica.S00.Pilot.FRENCH.DVDRip.XviD-NOTAG.avi
|
||||
: series: Battlestar Galactica
|
||||
season: 0
|
||||
title: Pilot
|
||||
special: Pilot
|
||||
language: French
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
releaseGroup: NOTAG
|
||||
|
||||
? The Big Bang Theory S00E00 Unaired Pilot VOSTFR TVRip XviD-VioCs
|
||||
: options: -n
|
||||
series: The Big Bang Theory
|
||||
season: 0
|
||||
episodeNumber: 0
|
||||
subtitleLanguage: French
|
||||
format: TV
|
||||
videoCodec: XviD
|
||||
releaseGroup: VioCs
|
||||
special: [Unaired, Pilot]
|
||||
title: Unaired Pilot
|
||||
|
||||
? The Big Bang Theory S01E00 PROPER Unaired Pilot TVRip XviD-GIGGITY
|
||||
: options: -n
|
||||
series: The Big Bang Theory
|
||||
season: 1
|
||||
episodeNumber: 0
|
||||
format: TV
|
||||
videoCodec: XviD
|
||||
releaseGroup: GIGGITY
|
||||
other: proper
|
||||
special: [Unaired, Pilot]
|
||||
title: Unaired Pilot
|
|
@ -1,168 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit import base_text_type, u
|
||||
|
||||
from unittest import TestCase, TestLoader, TextTestRunner
|
||||
import shlex
|
||||
|
||||
import yaml, logging, sys, os
|
||||
from os.path import *
|
||||
|
||||
|
||||
def currentPath():
|
||||
'''Returns the path in which the calling file is located.'''
|
||||
return dirname(join(os.getcwd(), sys._getframe(1).f_globals['__file__']))
|
||||
|
||||
|
||||
def addImportPath(path):
|
||||
'''Function that adds the specified path to the import path. The path can be
|
||||
absolute or relative to the calling file.'''
|
||||
importPath = abspath(join(currentPath(), path))
|
||||
sys.path = [importPath] + sys.path
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
from guessit.plugins import transformers
|
||||
import guessit
|
||||
from guessit.options import option_parser
|
||||
from guessit import *
|
||||
from guessit.matcher import *
|
||||
from guessit.fileutils import *
|
||||
|
||||
|
||||
def allTests(testClass):
|
||||
return TestLoader().loadTestsFromTestCase(testClass)
|
||||
|
||||
|
||||
class TestGuessit(TestCase):
|
||||
|
||||
def checkMinimumFieldsCorrect(self, filename, filetype=None, remove_type=True,
|
||||
exclude_files=None):
|
||||
groundTruth = yaml.load(load_file_in_same_dir(__file__, filename))
|
||||
|
||||
def guess_func(string, options=None):
|
||||
return guess_file_info(string, options=options, type=filetype)
|
||||
|
||||
return self.checkFields(groundTruth, guess_func, remove_type, exclude_files)
|
||||
|
||||
def checkFields(self, groundTruth, guess_func, remove_type=True,
|
||||
exclude_files=None):
|
||||
total = 0
|
||||
exclude_files = exclude_files or []
|
||||
|
||||
fails = {}
|
||||
additionals = {}
|
||||
|
||||
for filename, required_fields in groundTruth.items():
|
||||
filename = u(filename)
|
||||
if filename in exclude_files:
|
||||
continue
|
||||
|
||||
log.debug('\n' + '-' * 120)
|
||||
log.info('Guessing information for file: %s' % filename)
|
||||
|
||||
options = required_fields.pop('options') if 'options' in required_fields else None
|
||||
|
||||
if options:
|
||||
args = shlex.split(options)
|
||||
options, _ = option_parser.parse_args(args)
|
||||
options = vars(options)
|
||||
found = guess_func(filename, options)
|
||||
|
||||
total = total + 1
|
||||
|
||||
# no need for these in the unittests
|
||||
if remove_type:
|
||||
try:
|
||||
del found['type']
|
||||
except:
|
||||
pass
|
||||
for prop in ('container', 'mimetype'):
|
||||
if prop in found:
|
||||
del found[prop]
|
||||
|
||||
# props which are list of just 1 elem should be opened for easier writing of the tests
|
||||
for prop in ('language', 'subtitleLanguage', 'other', 'special'):
|
||||
value = found.get(prop, None)
|
||||
if isinstance(value, list) and len(value) == 1:
|
||||
found[prop] = value[0]
|
||||
|
||||
# look for missing properties
|
||||
for prop, value in required_fields.items():
|
||||
if prop not in found:
|
||||
log.debug("Prop '%s' not found in: %s" % (prop, filename))
|
||||
if not filename in fails:
|
||||
fails[filename] = []
|
||||
fails[filename].append("'%s' not found in: %s" % (prop, filename))
|
||||
continue
|
||||
|
||||
# if both properties are strings, do a case-insensitive comparison
|
||||
if (isinstance(value, base_text_type) and
|
||||
isinstance(found[prop], base_text_type)):
|
||||
if value.lower() != found[prop].lower():
|
||||
log.debug("Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||
if not filename in fails:
|
||||
fails[filename] = []
|
||||
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||
|
||||
# if both are lists, we assume list of strings and do a case-insensitive
|
||||
# comparison on their elements
|
||||
elif isinstance(value, list) and isinstance(found[prop], list):
|
||||
s1 = set(u(s).lower() for s in value)
|
||||
s2 = set(u(s).lower() for s in found[prop])
|
||||
if s1 != s2:
|
||||
log.debug("Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||
if not filename in fails:
|
||||
fails[filename] = []
|
||||
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||
# otherwise, just compare their values directly
|
||||
else:
|
||||
if found[prop] != value:
|
||||
log.debug("Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
|
||||
if not filename in fails:
|
||||
fails[filename] = []
|
||||
fails[filename].append("'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
|
||||
|
||||
# look for additional properties
|
||||
for prop, value in found.items():
|
||||
if prop not in required_fields:
|
||||
log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value)))
|
||||
if not filename in additionals:
|
||||
additionals[filename] = []
|
||||
additionals[filename].append("'%s': '%s'" % (prop, u(value)))
|
||||
|
||||
correct = total - len(fails)
|
||||
log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total))
|
||||
|
||||
for failed_entry, failed_properties in fails.items():
|
||||
log.error('---- ' + failed_entry + ' ----')
|
||||
for failed_property in failed_properties:
|
||||
log.error("FAILED: " + failed_property)
|
||||
|
||||
for additional_entry, additional_properties in additionals.items():
|
||||
log.warn('---- ' + additional_entry + ' ----')
|
||||
for additional_property in additional_properties:
|
||||
log.warn("ADDITIONAL: " + additional_property)
|
||||
|
||||
self.assertTrue(correct == total,
|
||||
msg='Correct: %d < Total: %d' % (correct, total))
|
|
@ -1,626 +0,0 @@
|
|||
|
||||
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
|
||||
: title: Fear and Loathing in Las Vegas
|
||||
year: 1998
|
||||
screenSize: 720p
|
||||
format: HD-DVD
|
||||
audioCodec: DTS
|
||||
videoCodec: h264
|
||||
releaseGroup: ESiR
|
||||
|
||||
? Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi
|
||||
: title: El Dia de la Bestia
|
||||
year: 1995
|
||||
format: DVD
|
||||
language: spanish
|
||||
videoCodec: DivX
|
||||
|
||||
? Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
|
||||
: title: Dark City
|
||||
year: 1998
|
||||
format: BluRay
|
||||
screenSize: 720p
|
||||
audioCodec: DTS
|
||||
videoCodec: h264
|
||||
releaseGroup: CHD
|
||||
|
||||
? Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv
|
||||
: title: Sin City
|
||||
year: 2005
|
||||
format: BluRay
|
||||
screenSize: 720p
|
||||
videoCodec: h264
|
||||
audioCodec: AC3
|
||||
releaseGroup: SEPTiC
|
||||
|
||||
|
||||
? Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi
|
||||
: title: Borat
|
||||
year: 2006
|
||||
other: PROPER
|
||||
format: DVD
|
||||
other: [ R5, Proper ]
|
||||
videoCodec: XviD
|
||||
releaseGroup: PUKKA
|
||||
|
||||
|
||||
? "[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
|
||||
: title: Le Prestige
|
||||
format: DVD
|
||||
videoCodec: h264
|
||||
videoProfile: HP
|
||||
audioCodec: AAC
|
||||
audioProfile: HE
|
||||
language: [ french, english ]
|
||||
subtitleLanguage: [ french, english ]
|
||||
|
||||
? Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi
|
||||
: title: Battle Royale
|
||||
year: 2000
|
||||
edition: special edition
|
||||
cdNumber: 1
|
||||
cdNumberTotal: 2
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
releaseGroup: ZeaL
|
||||
|
||||
? Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.avi
|
||||
: title: Brazil
|
||||
edition: Criterion Edition
|
||||
year: 1985
|
||||
cdNumber: 2
|
||||
|
||||
? Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv
|
||||
: title: Persepolis
|
||||
year: 2007
|
||||
videoCodec: h264
|
||||
audioCodec: AAC
|
||||
language: [ French, English ]
|
||||
subtitleLanguage: [ French, English ]
|
||||
|
||||
? Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv
|
||||
: title: Toy Story
|
||||
year: 1995
|
||||
format: HDTV
|
||||
screenSize: 720p
|
||||
language: [ english, spanish ]
|
||||
|
||||
? Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi
|
||||
: title: Office Space
|
||||
year: 1999
|
||||
format: DVD
|
||||
language: [ english, spanish ]
|
||||
videoCodec: XviD
|
||||
audioCodec: AC3
|
||||
|
||||
? Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.avi
|
||||
: title: Wild Zero
|
||||
year: 2000
|
||||
videoCodec: DivX
|
||||
releaseGroup: EPiC
|
||||
|
||||
? movies/Baraka_Edition_Collector.avi
|
||||
: title: Baraka
|
||||
edition: collector edition
|
||||
|
||||
? Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director's.Cut).CD1.DVDRip.XviD.AC3-WAF.avi
|
||||
: title: Blade Runner
|
||||
year: 1982
|
||||
edition: Director's Cut
|
||||
cdNumber: 1
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
audioCodec: AC3
|
||||
releaseGroup: WAF
|
||||
|
||||
? movies/American.The.Bill.Hicks.Story.2009.DVDRip.XviD-EPiSODE.[UsaBit.com]/UsaBit.com_esd-americanbh.avi
|
||||
: title: American The Bill Hicks Story
|
||||
year: 2009
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
releaseGroup: EPiSODE
|
||||
website: UsaBit.com
|
||||
|
||||
? movies/Charlie.And.Boots.DVDRip.XviD-TheWretched/wthd-cab.avi
|
||||
: title: Charlie And Boots
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
releaseGroup: TheWretched
|
||||
|
||||
? movies/Steig Larsson Millenium Trilogy (2009) BRrip 720 AAC x264/(1)The Girl With The Dragon Tattoo (2009) BRrip 720 AAC x264.mkv
|
||||
: title: The Girl With The Dragon Tattoo
|
||||
filmSeries: Steig Larsson Millenium Trilogy
|
||||
filmNumber: 1
|
||||
year: 2009
|
||||
format: BluRay
|
||||
audioCodec: AAC
|
||||
videoCodec: h264
|
||||
screenSize: 720p
|
||||
|
||||
? movies/Greenberg.REPACK.LiMiTED.DVDRip.XviD-ARROW/arw-repack-greenberg.dvdrip.xvid.avi
|
||||
: title: Greenberg
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
releaseGroup: ARROW
|
||||
other: ['Proper', 'Limited']
|
||||
|
||||
? Movies/Fr - Paris 2054, Renaissance (2005) - De Christian Volckman - (Film Divx Science Fiction Fantastique Thriller Policier N&B).avi
|
||||
: title: Paris 2054, Renaissance
|
||||
year: 2005
|
||||
language: french
|
||||
videoCodec: DivX
|
||||
|
||||
? Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
|
||||
: title: Avida
|
||||
year: 2006
|
||||
language: french
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
releaseGroup: PROD
|
||||
|
||||
? Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
|
||||
: title: Alice in Wonderland
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
releaseGroup: DiAMOND
|
||||
|
||||
? Movies/Ne.Le.Dis.A.Personne.Fr 2 cd/personnea_mp.avi
|
||||
: title: Ne Le Dis A Personne
|
||||
language: french
|
||||
cdNumberTotal: 2
|
||||
|
||||
? Movies/Bunker Palace Hôtel (Enki Bilal) (1989)/Enki Bilal - Bunker Palace Hotel (Fr Vhs Rip).avi
|
||||
: title: Bunker Palace Hôtel
|
||||
year: 1989
|
||||
language: french
|
||||
format: VHS
|
||||
|
||||
? Movies/21 (2008)/21.(2008).DVDRip.x264.AC3-FtS.[sharethefiles.com].mkv
|
||||
: title: "21"
|
||||
year: 2008
|
||||
format: DVD
|
||||
videoCodec: h264
|
||||
audioCodec: AC3
|
||||
releaseGroup: FtS
|
||||
website: sharethefiles.com
|
||||
|
||||
? Movies/9 (2009)/9.2009.Blu-ray.DTS.720p.x264.HDBRiSe.[sharethefiles.com].mkv
|
||||
: title: "9"
|
||||
year: 2009
|
||||
format: BluRay
|
||||
audioCodec: DTS
|
||||
screenSize: 720p
|
||||
videoCodec: h264
|
||||
releaseGroup: HDBRiSe
|
||||
website: sharethefiles.com
|
||||
|
||||
? Movies/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam.avi
|
||||
: title: Mamma Mia
|
||||
year: 2008
|
||||
format: DVD
|
||||
audioCodec: AC3
|
||||
videoCodec: XviD
|
||||
releaseGroup: CrazyTeam
|
||||
|
||||
? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
|
||||
: title: M.A.S.H.
|
||||
year: 1970
|
||||
videoCodec: DivX
|
||||
format: DVD
|
||||
|
||||
? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
|
||||
: title: The Doors
|
||||
year: 1991
|
||||
date: 2008-03-09
|
||||
format: BluRay
|
||||
screenSize: 720p
|
||||
audioCodec: AC3
|
||||
videoCodec: h264
|
||||
releaseGroup: HiS@SiLUHD
|
||||
language: english
|
||||
website: sharethefiles.com
|
||||
|
||||
? Movies/Ratatouille/video_ts-ratatouille.srt
|
||||
: title: Ratatouille
|
||||
format: DVD
|
||||
|
||||
? Movies/001 __ A classer/Fantomas se déchaine - Louis de Funès.avi
|
||||
: title: Fantomas se déchaine
|
||||
|
||||
? Movies/Comme une Image (2004)/Comme.Une.Image.FRENCH.DVDRiP.XViD-NTK.par-www.divx-overnet.com.avi
|
||||
: title: Comme une Image
|
||||
year: 2004
|
||||
language: french
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
releaseGroup: NTK
|
||||
website: www.divx-overnet.com
|
||||
|
||||
? Movies/Fantastic Mr Fox/Fantastic.Mr.Fox.2009.DVDRip.{x264+LC-AAC.5.1}{Fr-Eng}{Sub.Fr-Eng}-™.[sharethefiles.com].mkv
|
||||
: title: Fantastic Mr Fox
|
||||
year: 2009
|
||||
format: DVD
|
||||
videoCodec: h264
|
||||
audioCodec: AAC
|
||||
audioProfile: LC
|
||||
audioChannels: "5.1"
|
||||
language: [ french, english ]
|
||||
subtitleLanguage: [ french, english ]
|
||||
website: sharethefiles.com
|
||||
|
||||
? Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi
|
||||
: title: Somewhere
|
||||
year: 2010
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
releaseGroup: iLG
|
||||
|
||||
? Movies/Moon_(2009).mkv
|
||||
: title: Moon
|
||||
year: 2009
|
||||
|
||||
? Movies/Moon_(2009)-x01.mkv
|
||||
: title: Moon
|
||||
year: 2009
|
||||
bonusNumber: 1
|
||||
|
||||
? Movies/Moon_(2009)-x02-Making_Of.mkv
|
||||
: title: Moon
|
||||
year: 2009
|
||||
bonusNumber: 2
|
||||
bonusTitle: Making Of
|
||||
|
||||
? movies/James_Bond-f17-Goldeneye.mkv
|
||||
: title: Goldeneye
|
||||
filmSeries: James Bond
|
||||
filmNumber: 17
|
||||
|
||||
? /movies/James_Bond-f21-Casino_Royale.mkv
|
||||
: title: Casino Royale
|
||||
filmSeries: James Bond
|
||||
filmNumber: 21
|
||||
|
||||
? /movies/James_Bond-f21-Casino_Royale-x01-Becoming_Bond.mkv
|
||||
: title: Casino Royale
|
||||
filmSeries: James Bond
|
||||
filmNumber: 21
|
||||
bonusNumber: 1
|
||||
bonusTitle: Becoming Bond
|
||||
|
||||
? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
|
||||
: title: Casino Royale
|
||||
filmSeries: James Bond
|
||||
filmNumber: 21
|
||||
bonusNumber: 2
|
||||
bonusTitle: Stunts
|
||||
|
||||
? OSS_117--Cairo,_Nest_of_Spies.mkv
|
||||
: title: OSS 117--Cairo, Nest of Spies
|
||||
|
||||
? The Godfather Part III.mkv
|
||||
: title: The Godfather Part III
|
||||
|
||||
? Foobar Part VI.mkv
|
||||
: title: Foobar Part VI
|
||||
|
||||
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
|
||||
: title: The Insider
|
||||
year: 1999
|
||||
bonusNumber: 2
|
||||
bonusTitle: 60 Minutes Interview-1996
|
||||
|
||||
? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
|
||||
: title: Rush Beyond The Lighted Stage
|
||||
bonusNumber: 9
|
||||
bonusTitle: Between Sun and Moon-2002 Hartford
|
||||
|
||||
? /public/uTorrent/Downloads Finished/Movies/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX.mkv
|
||||
: title: Indiana Jones and the Temple of Doom
|
||||
year: 1984
|
||||
format: HDTV
|
||||
screenSize: 720p
|
||||
videoCodec: h264
|
||||
audioCodec: AC3
|
||||
audioChannels: "5.1"
|
||||
releaseGroup: REDµX
|
||||
|
||||
? The.Director’s.Notebook.2006.Blu-Ray.x264.DXVA.720p.AC3-de[42].mkv
|
||||
: title: The Director’s Notebook
|
||||
year: 2006
|
||||
format: BluRay
|
||||
videoCodec: h264
|
||||
videoApi: DXVA
|
||||
screenSize: 720p
|
||||
audioCodec: AC3
|
||||
releaseGroup: de[42]
|
||||
|
||||
? Movies/Cosmopolis.2012.LiMiTED.720p.BluRay.x264-AN0NYM0US[bb]/ano-cosmo.720p.mkv
|
||||
: title: Cosmopolis
|
||||
year: 2012
|
||||
screenSize: 720p
|
||||
videoCodec: h264
|
||||
releaseGroup: AN0NYM0US[bb]
|
||||
format: BluRay
|
||||
other: LIMITED
|
||||
|
||||
? movies/La Science des Rêves (2006)/La.Science.Des.Reves.FRENCH.DVDRip.XviD-MP-AceBot.avi
|
||||
: title: La Science des Rêves
|
||||
year: 2006
|
||||
format: DVD
|
||||
videoCodec: XviD
|
||||
videoProfile: MP
|
||||
releaseGroup: AceBot
|
||||
language: French
|
||||
|
||||
? The_Italian_Job.mkv
|
||||
: title: The Italian Job
|
||||
|
||||
? The.Rum.Diary.2011.1080p.BluRay.DTS.x264.D-Z0N3.mkv
|
||||
: title: The Rum Diary
|
||||
year: 2011
|
||||
screenSize: 1080p
|
||||
format: BluRay
|
||||
videoCodec: h264
|
||||
audioCodec: DTS
|
||||
releaseGroup: D-Z0N3
|
||||
|
||||
? Life.Of.Pi.2012.1080p.BluRay.DTS.x264.D-Z0N3.mkv
|
||||
: title: Life Of Pi
|
||||
year: 2012
|
||||
screenSize: 1080p
|
||||
format: BluRay
|
||||
videoCodec: h264
|
||||
audioCodec: DTS
|
||||
releaseGroup: D-Z0N3
|
||||
|
||||
? The.Kings.Speech.2010.1080p.BluRay.DTS.x264.D Z0N3.mkv
|
||||
: title: The Kings Speech
|
||||
year: 2010
|
||||
screenSize: 1080p
|
||||
format: BluRay
|
||||
audioCodec: DTS
|
||||
videoCodec: h264
|
||||
releaseGroup: D-Z0N3
|
||||
|
||||
? Street.Kings.2008.BluRay.1080p.DTS.x264.dxva EuReKA.mkv
|
||||
: title: Street Kings
|
||||
year: 2008
|
||||
format: BluRay
|
||||
screenSize: 1080p
|
||||
audioCodec: DTS
|
||||
videoCodec: h264
|
||||
videoApi: DXVA
|
||||
releaseGroup: EuReKa
|
||||
|
||||
? 2001.A.Space.Odyssey.1968.HDDVD.1080p.DTS.x264.dxva EuReKA.mkv
|
||||
: title: 2001 A Space Odyssey
|
||||
year: 1968
|
||||
format: HD-DVD
|
||||
screenSize: 1080p
|
||||
audioCodec: DTS
|
||||
videoCodec: h264
|
||||
videoApi: DXVA
|
||||
releaseGroup: EuReKa
|
||||
|
||||
? 2012.2009.720p.BluRay.x264.DTS WiKi.mkv
|
||||
: title: "2012"
|
||||
year: 2009
|
||||
screenSize: 720p
|
||||
format: BluRay
|
||||
videoCodec: h264
|
||||
audioCodec: DTS
|
||||
releaseGroup: WiKi
|
||||
|
||||
? /share/Download/movie/Dead Man Down (2013) BRRiP XViD DD5_1 Custom NLSubs =-_lt Q_o_Q gt-=_/XD607ebb-BRc59935-5155473f-1c5f49/XD607ebb-BRc59935-5155473f-1c5f49.avi
|
||||
: title: Dead Man Down
|
||||
year: 2013
|
||||
format: BluRay
|
||||
videoCodec: XviD
|
||||
audioChannels: "5.1"
|
||||
audioCodec: DolbyDigital
|
||||
idNumber: XD607ebb-BRc59935-5155473f-1c5f49
|
||||
|
||||
? Pacific.Rim.3D.2013.COMPLETE.BLURAY-PCH.avi
|
||||
: title: Pacific Rim
|
||||
year: 2013
|
||||
format: BluRay
|
||||
other:
|
||||
- complete
|
||||
- 3D
|
||||
releaseGroup: PCH
|
||||
|
||||
? Immersion.French.2011.STV.READNFO.QC.FRENCH.ENGLISH.NTSC.DVDR.nfo
|
||||
: title: Immersion French
|
||||
year: 2011
|
||||
language:
|
||||
- French
|
||||
- English
|
||||
|
||||
? Immersion.French.2011.STV.READNFO.QC.FRENCH.NTSC.DVDR.nfo
|
||||
: title: Immersion French
|
||||
year: 2011
|
||||
language: French
|
||||
|
||||
? Immersion.French.2011.STV.READNFO.QC.NTSC.DVDR.nfo
|
||||
: title: Immersion French
|
||||
year: 2011
|
||||
|
||||
? French.Immersion.2011.STV.READNFO.QC.ENGLISH.NTSC.DVDR.nfo
|
||||
: title: French Immersion
|
||||
year: 2011
|
||||
language: ENGLISH
|
||||
|
||||
? Howl's_Moving_Castle_(2004)_[720p,HDTV,x264,DTS]-FlexGet.avi
|
||||
: videoCodec: h264
|
||||
format: HDTV
|
||||
title: Howl's Moving Castle
|
||||
screenSize: 720p
|
||||
year: 2004
|
||||
audioCodec: DTS
|
||||
releaseGroup: FlexGet
|
||||
|
||||
? Pirates de langkasuka.2008.FRENCH.1920X1080.h264.AVC.AsiaRa.mkv
|
||||
: screenSize: 1080p
|
||||
year: 2008
|
||||
language: French
|
||||
videoCodec: h264
|
||||
title: Pirates de langkasuka
|
||||
releaseGroup: AsiaRa
|
||||
|
||||
? Masala (2013) Telugu Movie HD DVDScr XviD - Exclusive.avi
|
||||
: year: 2013
|
||||
videoCodec: XviD
|
||||
title: Masala
|
||||
format: HD-DVD
|
||||
other: screener
|
||||
language: Telugu
|
||||
releaseGroup: Exclusive
|
||||
|
||||
? Django Unchained 2012 DVDSCR X264 AAC-P2P.nfo
|
||||
: year: 2012
|
||||
other: screener
|
||||
videoCodec: h264
|
||||
title: Django Unchained
|
||||
audioCodec: AAC
|
||||
format: DVD
|
||||
releaseGroup: P2P
|
||||
|
||||
? Ejecutiva.En.Apuros(2009).BLURAY.SCR.Xvid.Spanish.LanzamientosD.nfo
|
||||
: year: 2009
|
||||
other: screener
|
||||
format: BluRay
|
||||
videoCodec: XviD
|
||||
language: Spanish
|
||||
title: Ejecutiva En Apuros
|
||||
|
||||
? Die.Schluempfe.2.German.DL.1080p.BluRay.x264-EXQUiSiTE.mkv
|
||||
: title: Die Schluempfe 2
|
||||
format: BluRay
|
||||
language:
|
||||
- Multiple languages
|
||||
- German
|
||||
videoCodec: h264
|
||||
releaseGroup: EXQUiSiTE
|
||||
screenSize: 1080p
|
||||
|
||||
? Rocky 1976 French SubForced BRRip x264 AC3-FUNKY.mkv
|
||||
: title: Rocky
|
||||
year: 1976
|
||||
subtitleLanguage: French
|
||||
format: BluRay
|
||||
videoCodec: h264
|
||||
audioCodec: AC3
|
||||
releaseGroup: FUNKY
|
||||
|
||||
? REDLINE (BD 1080p H264 10bit FLAC) [3xR].mkv
|
||||
: title: REDLINE
|
||||
format: BluRay
|
||||
videoCodec: h264
|
||||
videoProfile: 10bit
|
||||
audioCodec: Flac
|
||||
screenSize: 1080p
|
||||
|
||||
? The.Lizzie.McGuire.Movie.(2003).HR.DVDRiP.avi
|
||||
: title: The Lizzie McGuire Movie
|
||||
year: 2003
|
||||
screenSize: 480p
|
||||
format: DVD
|
||||
|
||||
? Hua.Mulan.BRRIP.MP4.x264.720p-HR.avi
|
||||
: title: Hua Mulan
|
||||
videoCodec: h264
|
||||
format: BluRay
|
||||
screenSize: 720p
|
||||
|
||||
? Dr.Seuss.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
|
||||
: videoCodec: XviD
|
||||
title: Dr Seuss The Lorax
|
||||
format: DVD
|
||||
other: LiNE
|
||||
year: 2012
|
||||
audioCodec: AC3
|
||||
audioProfile: HQ
|
||||
releaseGroup: Hive-CM8
|
||||
|
||||
|
||||
? "Star Wars: Episode IV - A New Hope (2004) Special Edition.MKV"
|
||||
: title: Star Wars Episode IV
|
||||
year: 2004
|
||||
edition: Special Edition
|
||||
|
||||
? Dr.LiNE.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
|
||||
: videoCodec: XviD
|
||||
title: Dr LiNE The Lorax
|
||||
format: DVD
|
||||
other: LiNE
|
||||
year: 2012
|
||||
audioCodec: AC3
|
||||
audioProfile: HQ
|
||||
releaseGroup: Hive-CM8
|
||||
|
||||
? Perfect Child-2007-TRUEFRENCH-TVRip.Xvid-h@mster.avi
|
||||
: releaseGroup: h@mster
|
||||
title: Perfect Child
|
||||
videoCodec: XviD
|
||||
language: French
|
||||
format: TV
|
||||
year: 2007
|
||||
|
||||
? entre.ciel.et.terre.(1994).dvdrip.h264.aac-psypeon.avi
|
||||
: audioCodec: AAC
|
||||
format: DVD
|
||||
releaseGroup: psypeon
|
||||
title: entre ciel et terre
|
||||
videoCodec: h264
|
||||
year: 1994
|
||||
|
||||
? Yves.Saint.Laurent.2013.FRENCH.DVDSCR.MD.XviD-ViVARiUM.avi
|
||||
: format: DVD
|
||||
language: French
|
||||
other: Screener
|
||||
releaseGroup: ViVARiUM
|
||||
title: Yves Saint Laurent
|
||||
videoCodec: XviD
|
||||
year: 2013
|
||||
|
||||
? Echec et Mort - Hard to Kill - Steven Seagal Multi 1080p BluRay x264 CCATS.avi
|
||||
: format: BluRay
|
||||
language: Multiple languages
|
||||
releaseGroup: CCATS
|
||||
screenSize: 1080p
|
||||
title: Echec et Mort
|
||||
videoCodec: h264
|
||||
|
||||
? Paparazzi - Timsit/Lindon (MKV 1080p tvripHD)
|
||||
: options: -n
|
||||
title: Paparazzi
|
||||
screenSize: 1080p
|
||||
format: HDTV
|
||||
|
||||
? some.movie.720p.bluray.x264-mind
|
||||
: options: -n
|
||||
title: some movie
|
||||
screenSize: 720p
|
||||
videoCodec: h264
|
||||
releaseGroup: mind
|
||||
format: BluRay
|
||||
|
||||
? Dr LiNE The Lorax 720p h264 BluRay
|
||||
: options: -n
|
||||
title: Dr LiNE The Lorax
|
||||
screenSize: 720p
|
||||
videoCodec: h264
|
||||
format: BluRay
|
||||
|
||||
? BeatdownFrenchDVDRip.mkv
|
||||
: title: Beatdown
|
||||
language: French
|
||||
format: DVD
|
||||
|
||||
? YvesSaintLaurent2013FrenchDVDScrXvid.avi
|
||||
: format: DVD
|
||||
language: French
|
||||
other: Screener
|
||||
title: Yves saint laurent
|
||||
videoCodec: XviD
|
||||
year: 2013
|
|
@ -1,473 +0,0 @@
|
|||
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
|
||||
aar aa Afar, afar 0 0
|
||||
abk ab Abkhazian 0 0
|
||||
ace Achinese 0 0
|
||||
ach Acoli 0 0
|
||||
ada Adangme 0 0
|
||||
ady adyghé 0 0
|
||||
afa Afro-Asiatic (Other) 0 0
|
||||
afh Afrihili 0 0
|
||||
afr af Afrikaans 0 0
|
||||
ain Ainu 0 0
|
||||
aka ak Akan 0 0
|
||||
akk Akkadian 0 0
|
||||
alb sq Albanian 1 1
|
||||
ale Aleut 0 0
|
||||
alg Algonquian languages 0 0
|
||||
alt Southern Altai 0 0
|
||||
amh am Amharic 0 0
|
||||
ang English, Old (ca.450-1100) 0 0
|
||||
apa Apache languages 0 0
|
||||
ara ar Arabic 1 1
|
||||
arc Aramaic 0 0
|
||||
arg an Aragonese 0 0
|
||||
arm hy Armenian 1 0
|
||||
arn Araucanian 0 0
|
||||
arp Arapaho 0 0
|
||||
art Artificial (Other) 0 0
|
||||
arw Arawak 0 0
|
||||
asm as Assamese 0 0
|
||||
ast Asturian, Bable 0 0
|
||||
ath Athapascan languages 0 0
|
||||
aus Australian languages 0 0
|
||||
ava av Avaric 0 0
|
||||
ave ae Avestan 0 0
|
||||
awa Awadhi 0 0
|
||||
aym ay Aymara 0 0
|
||||
aze az Azerbaijani 0 0
|
||||
bad Banda 0 0
|
||||
bai Bamileke languages 0 0
|
||||
bak ba Bashkir 0 0
|
||||
bal Baluchi 0 0
|
||||
bam bm Bambara 0 0
|
||||
ban Balinese 0 0
|
||||
baq eu Basque 1 1
|
||||
bas Basa 0 0
|
||||
bat Baltic (Other) 0 0
|
||||
bej Beja 0 0
|
||||
bel be Belarusian 0 0
|
||||
bem Bemba 0 0
|
||||
ben bn Bengali 1 0
|
||||
ber Berber (Other) 0 0
|
||||
bho Bhojpuri 0 0
|
||||
bih bh Bihari 0 0
|
||||
bik Bikol 0 0
|
||||
bin Bini 0 0
|
||||
bis bi Bislama 0 0
|
||||
bla Siksika 0 0
|
||||
bnt Bantu (Other) 0 0
|
||||
bos bs Bosnian 1 0
|
||||
bra Braj 0 0
|
||||
bre br Breton 1 0
|
||||
btk Batak (Indonesia) 0 0
|
||||
bua Buriat 0 0
|
||||
bug Buginese 0 0
|
||||
bul bg Bulgarian 1 1
|
||||
bur my Burmese 0 0
|
||||
byn Blin 0 0
|
||||
cad Caddo 0 0
|
||||
cai Central American Indian (Other) 0 0
|
||||
car Carib 0 0
|
||||
cat ca Catalan 1 1
|
||||
cau Caucasian (Other) 0 0
|
||||
ceb Cebuano 0 0
|
||||
cel Celtic (Other) 0 0
|
||||
cha ch Chamorro 0 0
|
||||
chb Chibcha 0 0
|
||||
che ce Chechen 0 0
|
||||
chg Chagatai 0 0
|
||||
chi zh Chinese 1 1
|
||||
chk Chuukese 0 0
|
||||
chm Mari 0 0
|
||||
chn Chinook jargon 0 0
|
||||
cho Choctaw 0 0
|
||||
chp Chipewyan 0 0
|
||||
chr Cherokee 0 0
|
||||
chu cu Church Slavic 0 0
|
||||
chv cv Chuvash 0 0
|
||||
chy Cheyenne 0 0
|
||||
cmc Chamic languages 0 0
|
||||
cop Coptic 0 0
|
||||
cor kw Cornish 0 0
|
||||
cos co Corsican 0 0
|
||||
cpe Creoles and pidgins, English based (Other) 0 0
|
||||
cpf Creoles and pidgins, French-based (Other) 0 0
|
||||
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
|
||||
cre cr Cree 0 0
|
||||
crh Crimean Tatar 0 0
|
||||
crp Creoles and pidgins (Other) 0 0
|
||||
csb Kashubian 0 0
|
||||
cus Cushitic (Other)' couchitiques, autres langues 0 0
|
||||
cze cs Czech 1 1
|
||||
dak Dakota 0 0
|
||||
dan da Danish 1 1
|
||||
dar Dargwa 0 0
|
||||
day Dayak 0 0
|
||||
del Delaware 0 0
|
||||
den Slave (Athapascan) 0 0
|
||||
dgr Dogrib 0 0
|
||||
din Dinka 0 0
|
||||
div dv Divehi 0 0
|
||||
doi Dogri 0 0
|
||||
dra Dravidian (Other) 0 0
|
||||
dua Duala 0 0
|
||||
dum Dutch, Middle (ca.1050-1350) 0 0
|
||||
dut nl Dutch 1 1
|
||||
dyu Dyula 0 0
|
||||
dzo dz Dzongkha 0 0
|
||||
efi Efik 0 0
|
||||
egy Egyptian (Ancient) 0 0
|
||||
eka Ekajuk 0 0
|
||||
elx Elamite 0 0
|
||||
eng en English 1 1
|
||||
enm English, Middle (1100-1500) 0 0
|
||||
epo eo Esperanto 1 0
|
||||
est et Estonian 1 1
|
||||
ewe ee Ewe 0 0
|
||||
ewo Ewondo 0 0
|
||||
fan Fang 0 0
|
||||
fao fo Faroese 0 0
|
||||
fat Fanti 0 0
|
||||
fij fj Fijian 0 0
|
||||
fil Filipino 0 0
|
||||
fin fi Finnish 1 1
|
||||
fiu Finno-Ugrian (Other) 0 0
|
||||
fon Fon 0 0
|
||||
fre fr French 1 1
|
||||
frm French, Middle (ca.1400-1600) 0 0
|
||||
fro French, Old (842-ca.1400) 0 0
|
||||
fry fy Frisian 0 0
|
||||
ful ff Fulah 0 0
|
||||
fur Friulian 0 0
|
||||
gaa Ga 0 0
|
||||
gay Gayo 0 0
|
||||
gba Gbaya 0 0
|
||||
gem Germanic (Other) 0 0
|
||||
geo ka Georgian 1 1
|
||||
ger de German 1 1
|
||||
gez Geez 0 0
|
||||
gil Gilbertese 0 0
|
||||
gla gd Gaelic 0 0
|
||||
gle ga Irish 0 0
|
||||
glg gl Galician 1 1
|
||||
glv gv Manx 0 0
|
||||
gmh German, Middle High (ca.1050-1500) 0 0
|
||||
goh German, Old High (ca.750-1050) 0 0
|
||||
gon Gondi 0 0
|
||||
gor Gorontalo 0 0
|
||||
got Gothic 0 0
|
||||
grb Grebo 0 0
|
||||
grc Greek, Ancient (to 1453) 0 0
|
||||
ell el Greek 1 1
|
||||
grn gn Guarani 0 0
|
||||
guj gu Gujarati 0 0
|
||||
gwi Gwich´in 0 0
|
||||
hai Haida 0 0
|
||||
hat ht Haitian 0 0
|
||||
hau ha Hausa 0 0
|
||||
haw Hawaiian 0 0
|
||||
heb he Hebrew 1 1
|
||||
her hz Herero 0 0
|
||||
hil Hiligaynon 0 0
|
||||
him Himachali 0 0
|
||||
hin hi Hindi 1 1
|
||||
hit Hittite 0 0
|
||||
hmn Hmong 0 0
|
||||
hmo ho Hiri Motu 0 0
|
||||
hrv hr Croatian 1 1
|
||||
hun hu Hungarian 1 1
|
||||
hup Hupa 0 0
|
||||
iba Iban 0 0
|
||||
ibo ig Igbo 0 0
|
||||
ice is Icelandic 1 1
|
||||
ido io Ido 0 0
|
||||
iii ii Sichuan Yi 0 0
|
||||
ijo Ijo 0 0
|
||||
iku iu Inuktitut 0 0
|
||||
ile ie Interlingue 0 0
|
||||
ilo Iloko 0 0
|
||||
ina ia Interlingua (International Auxiliary Language Asso 0 0
|
||||
inc Indic (Other) 0 0
|
||||
ind id Indonesian 1 1
|
||||
ine Indo-European (Other) 0 0
|
||||
inh Ingush 0 0
|
||||
ipk ik Inupiaq 0 0
|
||||
ira Iranian (Other) 0 0
|
||||
iro Iroquoian languages 0 0
|
||||
ita it Italian 1 1
|
||||
jav jv Javanese 0 0
|
||||
jpn ja Japanese 1 1
|
||||
jpr Judeo-Persian 0 0
|
||||
jrb Judeo-Arabic 0 0
|
||||
kaa Kara-Kalpak 0 0
|
||||
kab Kabyle 0 0
|
||||
kac Kachin 0 0
|
||||
kal kl Kalaallisut 0 0
|
||||
kam Kamba 0 0
|
||||
kan kn Kannada 0 0
|
||||
kar Karen 0 0
|
||||
kas ks Kashmiri 0 0
|
||||
kau kr Kanuri 0 0
|
||||
kaw Kawi 0 0
|
||||
kaz kk Kazakh 1 0
|
||||
kbd Kabardian 0 0
|
||||
kha Khasi 0 0
|
||||
khi Khoisan (Other) 0 0
|
||||
khm km Khmer 1 1
|
||||
kho Khotanese 0 0
|
||||
kik ki Kikuyu 0 0
|
||||
kin rw Kinyarwanda 0 0
|
||||
kir ky Kirghiz 0 0
|
||||
kmb Kimbundu 0 0
|
||||
kok Konkani 0 0
|
||||
kom kv Komi 0 0
|
||||
kon kg Kongo 0 0
|
||||
kor ko Korean 1 1
|
||||
kos Kosraean 0 0
|
||||
kpe Kpelle 0 0
|
||||
krc Karachay-Balkar 0 0
|
||||
kro Kru 0 0
|
||||
kru Kurukh 0 0
|
||||
kua kj Kuanyama 0 0
|
||||
kum Kumyk 0 0
|
||||
kur ku Kurdish 0 0
|
||||
kut Kutenai 0 0
|
||||
lad Ladino 0 0
|
||||
lah Lahnda 0 0
|
||||
lam Lamba 0 0
|
||||
lao lo Lao 0 0
|
||||
lat la Latin 0 0
|
||||
lav lv Latvian 1 0
|
||||
lez Lezghian 0 0
|
||||
lim li Limburgan 0 0
|
||||
lin ln Lingala 0 0
|
||||
lit lt Lithuanian 1 0
|
||||
lol Mongo 0 0
|
||||
loz Lozi 0 0
|
||||
ltz lb Luxembourgish 1 0
|
||||
lua Luba-Lulua 0 0
|
||||
lub lu Luba-Katanga 0 0
|
||||
lug lg Ganda 0 0
|
||||
lui Luiseno 0 0
|
||||
lun Lunda 0 0
|
||||
luo Luo (Kenya and Tanzania) 0 0
|
||||
lus lushai 0 0
|
||||
mac mk Macedonian 1 1
|
||||
mad Madurese 0 0
|
||||
mag Magahi 0 0
|
||||
mah mh Marshallese 0 0
|
||||
mai Maithili 0 0
|
||||
mak Makasar 0 0
|
||||
mal ml Malayalam 0 0
|
||||
man Mandingo 0 0
|
||||
mao mi Maori 0 0
|
||||
map Austronesian (Other) 0 0
|
||||
mar mr Marathi 0 0
|
||||
mas Masai 0 0
|
||||
may ms Malay 1 1
|
||||
mdf Moksha 0 0
|
||||
mdr Mandar 0 0
|
||||
men Mende 0 0
|
||||
mga Irish, Middle (900-1200) 0 0
|
||||
mic Mi'kmaq 0 0
|
||||
min Minangkabau 0 0
|
||||
mis Miscellaneous languages 0 0
|
||||
mkh Mon-Khmer (Other) 0 0
|
||||
mlg mg Malagasy 0 0
|
||||
mlt mt Maltese 0 0
|
||||
mnc Manchu 0 0
|
||||
mni Manipuri 0 0
|
||||
mno Manobo languages 0 0
|
||||
moh Mohawk 0 0
|
||||
mol mo Moldavian 0 0
|
||||
mon mn Mongolian 1 0
|
||||
mos Mossi 0 0
|
||||
mwl Mirandese 0 0
|
||||
mul Multiple languages 0 0
|
||||
mun Munda languages 0 0
|
||||
mus Creek 0 0
|
||||
mwr Marwari 0 0
|
||||
myn Mayan languages 0 0
|
||||
myv Erzya 0 0
|
||||
nah Nahuatl 0 0
|
||||
nai North American Indian 0 0
|
||||
nap Neapolitan 0 0
|
||||
nau na Nauru 0 0
|
||||
nav nv Navajo 0 0
|
||||
nbl nr Ndebele, South 0 0
|
||||
nde nd Ndebele, North 0 0
|
||||
ndo ng Ndonga 0 0
|
||||
nds Low German 0 0
|
||||
nep ne Nepali 0 0
|
||||
new Nepal Bhasa 0 0
|
||||
nia Nias 0 0
|
||||
nic Niger-Kordofanian (Other) 0 0
|
||||
niu Niuean 0 0
|
||||
nno nn Norwegian Nynorsk 0 0
|
||||
nob nb Norwegian Bokmal 0 0
|
||||
nog Nogai 0 0
|
||||
non Norse, Old 0 0
|
||||
nor no Norwegian 1 1
|
||||
nso Northern Sotho 0 0
|
||||
nub Nubian languages 0 0
|
||||
nwc Classical Newari 0 0
|
||||
nya ny Chichewa 0 0
|
||||
nym Nyamwezi 0 0
|
||||
nyn Nyankole 0 0
|
||||
nyo Nyoro 0 0
|
||||
nzi Nzima 0 0
|
||||
oci oc Occitan 1 1
|
||||
oji oj Ojibwa 0 0
|
||||
ori or Oriya 0 0
|
||||
orm om Oromo 0 0
|
||||
osa Osage 0 0
|
||||
oss os Ossetian 0 0
|
||||
ota Turkish, Ottoman (1500-1928) 0 0
|
||||
oto Otomian languages 0 0
|
||||
paa Papuan (Other) 0 0
|
||||
pag Pangasinan 0 0
|
||||
pal Pahlavi 0 0
|
||||
pam Pampanga 0 0
|
||||
pan pa Panjabi 0 0
|
||||
pap Papiamento 0 0
|
||||
pau Palauan 0 0
|
||||
peo Persian, Old (ca.600-400 B.C.) 0 0
|
||||
per fa Persian 1 1
|
||||
phi Philippine (Other) 0 0
|
||||
phn Phoenician 0 0
|
||||
pli pi Pali 0 0
|
||||
pol pl Polish 1 1
|
||||
pon Pohnpeian 0 0
|
||||
por pt Portuguese 1 1
|
||||
pra Prakrit languages 0 0
|
||||
pro Provençal, Old (to 1500) 0 0
|
||||
pus ps Pushto 0 0
|
||||
que qu Quechua 0 0
|
||||
raj Rajasthani 0 0
|
||||
rap Rapanui 0 0
|
||||
rar Rarotongan 0 0
|
||||
roa Romance (Other) 0 0
|
||||
roh rm Raeto-Romance 0 0
|
||||
rom Romany 0 0
|
||||
run rn Rundi 0 0
|
||||
rup Aromanian 0 0
|
||||
rus ru Russian 1 1
|
||||
sad Sandawe 0 0
|
||||
sag sg Sango 0 0
|
||||
sah Yakut 0 0
|
||||
sai South American Indian (Other) 0 0
|
||||
sal Salishan languages 0 0
|
||||
sam Samaritan Aramaic 0 0
|
||||
san sa Sanskrit 0 0
|
||||
sas Sasak 0 0
|
||||
sat Santali 0 0
|
||||
scc sr Serbian 1 1
|
||||
scn Sicilian 0 0
|
||||
sco Scots 0 0
|
||||
sel Selkup 0 0
|
||||
sem Semitic (Other) 0 0
|
||||
sga Irish, Old (to 900) 0 0
|
||||
sgn Sign Languages 0 0
|
||||
shn Shan 0 0
|
||||
sid Sidamo 0 0
|
||||
sin si Sinhalese 1 1
|
||||
sio Siouan languages 0 0
|
||||
sit Sino-Tibetan (Other) 0 0
|
||||
sla Slavic (Other) 0 0
|
||||
slo sk Slovak 1 1
|
||||
slv sl Slovenian 1 1
|
||||
sma Southern Sami 0 0
|
||||
sme se Northern Sami 0 0
|
||||
smi Sami languages (Other) 0 0
|
||||
smj Lule Sami 0 0
|
||||
smn Inari Sami 0 0
|
||||
smo sm Samoan 0 0
|
||||
sms Skolt Sami 0 0
|
||||
sna sn Shona 0 0
|
||||
snd sd Sindhi 0 0
|
||||
snk Soninke 0 0
|
||||
sog Sogdian 0 0
|
||||
som so Somali 0 0
|
||||
son Songhai 0 0
|
||||
sot st Sotho, Southern 0 0
|
||||
spa es Spanish 1 1
|
||||
srd sc Sardinian 0 0
|
||||
srr Serer 0 0
|
||||
ssa Nilo-Saharan (Other) 0 0
|
||||
ssw ss Swati 0 0
|
||||
suk Sukuma 0 0
|
||||
sun su Sundanese 0 0
|
||||
sus Susu 0 0
|
||||
sux Sumerian 0 0
|
||||
swa sw Swahili 1 0
|
||||
swe sv Swedish 1 1
|
||||
syr Syriac 1 0
|
||||
tah ty Tahitian 0 0
|
||||
tai Tai (Other) 0 0
|
||||
tam ta Tamil 0 0
|
||||
tat tt Tatar 0 0
|
||||
tel te Telugu 0 0
|
||||
tem Timne 0 0
|
||||
ter Tereno 0 0
|
||||
tet Tetum 0 0
|
||||
tgk tg Tajik 0 0
|
||||
tgl tl Tagalog 1 1
|
||||
tha th Thai 1 1
|
||||
tib bo Tibetan 0 0
|
||||
tig Tigre 0 0
|
||||
tir ti Tigrinya 0 0
|
||||
tiv Tiv 0 0
|
||||
tkl Tokelau 0 0
|
||||
tlh Klingon 0 0
|
||||
tli Tlingit 0 0
|
||||
tmh Tamashek 0 0
|
||||
tog Tonga (Nyasa) 0 0
|
||||
ton to Tonga (Tonga Islands) 0 0
|
||||
tpi Tok Pisin 0 0
|
||||
tsi Tsimshian 0 0
|
||||
tsn tn Tswana 0 0
|
||||
tso ts Tsonga 0 0
|
||||
tuk tk Turkmen 0 0
|
||||
tum Tumbuka 0 0
|
||||
tup Tupi languages 0 0
|
||||
tur tr Turkish 1 1
|
||||
tut Altaic (Other) 0 0
|
||||
tvl Tuvalu 0 0
|
||||
twi tw Twi 0 0
|
||||
tyv Tuvinian 0 0
|
||||
udm Udmurt 0 0
|
||||
uga Ugaritic 0 0
|
||||
uig ug Uighur 0 0
|
||||
ukr uk Ukrainian 1 1
|
||||
umb Umbundu 0 0
|
||||
und Undetermined 0 0
|
||||
urd ur Urdu 1 0
|
||||
uzb uz Uzbek 0 0
|
||||
vai Vai 0 0
|
||||
ven ve Venda 0 0
|
||||
vie vi Vietnamese 1 1
|
||||
vol vo Volapük 0 0
|
||||
vot Votic 0 0
|
||||
wak Wakashan languages 0 0
|
||||
wal Walamo 0 0
|
||||
war Waray 0 0
|
||||
was Washo 0 0
|
||||
wel cy Welsh 0 0
|
||||
wen Sorbian languages 0 0
|
||||
wln wa Walloon 0 0
|
||||
wol wo Wolof 0 0
|
||||
xal Kalmyk 0 0
|
||||
xho xh Xhosa 0 0
|
||||
yao Yao 0 0
|
||||
yap Yapese 0 0
|
||||
yid yi Yiddish 0 0
|
||||
yor yo Yoruba 0 0
|
||||
ypk Yupik languages 0 0
|
||||
zap Zapotec 0 0
|
||||
zen Zenaga 0 0
|
||||
zha za Zhuang 0 0
|
||||
znd Zande 0 0
|
||||
zul zu Zulu 0 0
|
||||
zun Zuni 0 0
|
||||
rum ro Romanian 1 1
|
||||
pob pb Brazilian 1 1
|
|
@ -1,54 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
|
||||
|
||||
class TestApi(TestGuessit):
|
||||
def test_api(self):
|
||||
movie_path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
|
||||
|
||||
movie_info = guessit.guess_movie_info(movie_path)
|
||||
video_info = guessit.guess_video_info(movie_path)
|
||||
episode_info = guessit.guess_episode_info(movie_path)
|
||||
file_info = guessit.guess_file_info(movie_path)
|
||||
|
||||
self.assertEqual(guessit.guess_file_info(movie_path, type='movie'), movie_info)
|
||||
self.assertEqual(guessit.guess_file_info(movie_path, type='video'), video_info)
|
||||
self.assertEqual(guessit.guess_file_info(movie_path, type='episode'), episode_info)
|
||||
|
||||
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'movie'}), movie_info)
|
||||
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'video'}), video_info)
|
||||
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}), episode_info)
|
||||
|
||||
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}, type='movie'), episode_info) # kwargs priority other options
|
||||
|
||||
movie_path_name_only = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD'
|
||||
file_info_name_only = guessit.guess_file_info(movie_path_name_only, options={"name_only": True})
|
||||
|
||||
self.assertFalse('container' in file_info_name_only)
|
||||
self.assertTrue('container' in file_info)
|
||||
|
||||
suite = allTests(TestApi)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
|
||||
|
||||
class TestAutoDetect(TestGuessit):
|
||||
def testEmpty(self):
|
||||
result = guessit.guess_file_info('')
|
||||
self.assertEqual(result, {})
|
||||
|
||||
result = guessit.guess_file_info('___-__')
|
||||
self.assertEqual(result, {})
|
||||
|
||||
result = guessit.guess_file_info('__-.avc')
|
||||
self.assertEqual(result, {'type': 'unknown', 'extension': 'avc'})
|
||||
|
||||
def testAutoDetect(self):
|
||||
self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
|
||||
remove_type=False)
|
||||
|
||||
|
||||
suite = allTests(TestAutoDetect)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,46 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
|
||||
IGNORE_EPISODES = []
|
||||
IGNORE_MOVIES = []
|
||||
|
||||
|
||||
class TestAutoDetectAll(TestGuessit):
|
||||
def testAutoMatcher(self):
|
||||
self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
|
||||
remove_type=False)
|
||||
|
||||
def testAutoMatcherMovies(self):
|
||||
self.checkMinimumFieldsCorrect(filename='movies.yaml',
|
||||
exclude_files=IGNORE_MOVIES)
|
||||
|
||||
def testAutoMatcherEpisodes(self):
|
||||
self.checkMinimumFieldsCorrect(filename='episodes.yaml',
|
||||
exclude_files=IGNORE_EPISODES)
|
||||
|
||||
|
||||
suite = allTests(TestAutoDetectAll)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
import guessit
|
||||
import guessit.hash_ed2k
|
||||
import unittest
|
||||
import doctest
|
||||
|
||||
|
||||
def load_tests(loader, tests, ignore):
|
||||
tests.addTests(doctest.DocTestSuite(guessit))
|
||||
tests.addTests(doctest.DocTestSuite(guessit.date))
|
||||
tests.addTests(doctest.DocTestSuite(guessit.fileutils))
|
||||
tests.addTests(doctest.DocTestSuite(guessit.guess))
|
||||
tests.addTests(doctest.DocTestSuite(guessit.hash_ed2k))
|
||||
tests.addTests(doctest.DocTestSuite(guessit.language))
|
||||
tests.addTests(doctest.DocTestSuite(guessit.matchtree))
|
||||
tests.addTests(doctest.DocTestSuite(guessit.textutils))
|
||||
return tests
|
||||
|
||||
suite = unittest.TestSuite()
|
||||
load_tests(None, suite, None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,35 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
|
||||
|
||||
class TestEpisode(TestGuessit):
|
||||
def testEpisodes(self):
|
||||
self.checkMinimumFieldsCorrect(filetype='episode',
|
||||
filename='episodes.yaml')
|
||||
|
||||
|
||||
suite = allTests(TestEpisode)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,46 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
|
||||
|
||||
class TestHashes(TestGuessit):
|
||||
def test_hashes(self):
|
||||
hashes = (
|
||||
('hash_mpc', '1MB', u'8542ad406c15c8bd'), # TODO: Check if this value is valid
|
||||
('hash_ed2k', '1MB', u'ed2k://|file|1MB|1048576|AA3CC5552A9931A76B61A41D306735F7|/'), # TODO: Check if this value is valid
|
||||
('hash_md5', '1MB', u'5d8dcbca8d8ac21766f28797d6c3954c'),
|
||||
('hash_sha1', '1MB', u'51d2b8f3248d7ee495b7750c8da5aa3b3819de9d'),
|
||||
('hash_md5', 'dummy.srt', u'64de6b5893cac24456c46a935ef9c359'),
|
||||
('hash_sha1', 'dummy.srt', u'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
|
||||
)
|
||||
|
||||
for hash_type, filename, expected_value in hashes:
|
||||
guess = guess_file_info(file_in_same_dir(__file__, filename), hash_type)
|
||||
computed_value = guess.get(hash_type)
|
||||
self.assertEqual(expected_value, guess.get(hash_type), "Invalid %s for %s: %s != %s" % (hash_type, filename, computed_value, expected_value))
|
||||
|
||||
|
||||
suite = allTests(TestHashes)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,138 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
|
||||
import io
|
||||
|
||||
|
||||
class TestLanguage(TestGuessit):
|
||||
|
||||
def check_languages(self, languages):
|
||||
for lang1, lang2 in languages.items():
|
||||
self.assertEqual(Language(lang1),
|
||||
Language(lang2))
|
||||
|
||||
def test_addic7ed(self):
|
||||
languages = {'English': 'en',
|
||||
'English (US)': 'en',
|
||||
'English (UK)': 'en',
|
||||
'Italian': 'it',
|
||||
'Portuguese': 'pt',
|
||||
'Portuguese (Brazilian)': 'pt',
|
||||
'Romanian': 'ro',
|
||||
'Español (Latinoamérica)': 'es',
|
||||
'Español (España)': 'es',
|
||||
'Spanish (Latin America)': 'es',
|
||||
'Español': 'es',
|
||||
'Spanish': 'es',
|
||||
'Spanish (Spain)': 'es',
|
||||
'French': 'fr',
|
||||
'Greek': 'el',
|
||||
'Arabic': 'ar',
|
||||
'German': 'de',
|
||||
'Croatian': 'hr',
|
||||
'Indonesian': 'id',
|
||||
'Hebrew': 'he',
|
||||
'Russian': 'ru',
|
||||
'Turkish': 'tr',
|
||||
'Swedish': 'se',
|
||||
'Czech': 'cs',
|
||||
'Dutch': 'nl',
|
||||
'Hungarian': 'hu',
|
||||
'Norwegian': 'no',
|
||||
'Polish': 'pl',
|
||||
'Persian': 'fa'}
|
||||
|
||||
self.check_languages(languages)
|
||||
|
||||
def test_subswiki(self):
|
||||
languages = {'English (US)': 'en', 'English (UK)': 'en', 'English': 'en',
|
||||
'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
|
||||
'Español (Latinoamérica)': 'es', 'Español (España)': 'es',
|
||||
'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
|
||||
|
||||
self.check_languages(languages)
|
||||
|
||||
def test_tvsubtitles(self):
|
||||
languages = {'English': 'en', 'Español': 'es', 'French': 'fr', 'German': 'de',
|
||||
'Brazilian': 'br', 'Russian': 'ru', 'Ukrainian': 'ua', 'Italian': 'it',
|
||||
'Greek': 'gr', 'Arabic': 'ar', 'Hungarian': 'hu', 'Polish': 'pl',
|
||||
'Turkish': 'tr', 'Dutch': 'nl', 'Portuguese': 'pt', 'Swedish': 'sv',
|
||||
'Danish': 'da', 'Finnish': 'fi', 'Korean': 'ko', 'Chinese': 'cn',
|
||||
'Japanese': 'jp', 'Bulgarian': 'bg', 'Czech': 'cz', 'Romanian': 'ro'}
|
||||
|
||||
self.check_languages(languages)
|
||||
|
||||
def test_opensubtitles(self):
|
||||
opensubtitles_langfile = file_in_same_dir(__file__, 'opensubtitles_languages_2012_05_09.txt')
|
||||
for l in [u(l).strip() for l in io.open(opensubtitles_langfile, encoding='utf-8')][1:]:
|
||||
idlang, alpha2, _, upload_enabled, web_enabled = l.strip().split('\t')
|
||||
# do not test languages that are too esoteric / not widely available
|
||||
if int(upload_enabled) and int(web_enabled):
|
||||
# check that we recognize the opensubtitles language code correctly
|
||||
# and that we are able to output this code from a language
|
||||
self.assertEqual(idlang, Language(idlang).opensubtitles)
|
||||
if alpha2:
|
||||
# check we recognize the opensubtitles 2-letter code correctly
|
||||
self.check_languages({idlang: alpha2})
|
||||
|
||||
def test_tmdb(self):
|
||||
# examples from http://api.themoviedb.org/2.1/language-tags
|
||||
for lang in ['en-US', 'en-CA', 'es-MX', 'fr-PF']:
|
||||
self.assertEqual(lang, Language(lang).tmdb)
|
||||
|
||||
def test_subtitulos(self):
|
||||
languages = {'English (US)': 'en', 'English (UK)': 'en', 'English': 'en',
|
||||
'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
|
||||
'Español (Latinoamérica)': 'es', 'Español (España)': 'es',
|
||||
'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
|
||||
|
||||
self.check_languages(languages)
|
||||
|
||||
def test_thesubdb(self):
|
||||
languages = {'af': 'af', 'cs': 'cs', 'da': 'da', 'de': 'de', 'en': 'en', 'es': 'es', 'fi': 'fi',
|
||||
'fr': 'fr', 'hu': 'hu', 'id': 'id', 'it': 'it', 'la': 'la', 'nl': 'nl', 'no': 'no',
|
||||
'oc': 'oc', 'pl': 'pl', 'pt': 'pt', 'ro': 'ro', 'ru': 'ru', 'sl': 'sl', 'sr': 'sr',
|
||||
'sv': 'sv', 'tr': 'tr'}
|
||||
|
||||
self.check_languages(languages)
|
||||
|
||||
def test_language_object(self):
|
||||
self.assertEqual(len(list(set([Language('qwerty'), Language('asdf')]))), 1)
|
||||
d = {Language('qwerty'): 7}
|
||||
d[Language('asdf')] = 23
|
||||
self.assertEqual(d[Language('qwerty')], 23)
|
||||
|
||||
def test_exceptions(self):
|
||||
self.assertEqual(Language('br'), Language('pt(br)'))
|
||||
|
||||
# languages should be equal regardless of country
|
||||
self.assertEqual(Language('br'), Language('pt'))
|
||||
|
||||
self.assertEqual(Language('unknown'), Language('und'))
|
||||
|
||||
|
||||
suite = allTests(TestLanguage)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,70 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
from guessit.fileutils import split_path, file_in_same_dir
|
||||
from guessit.textutils import strip_brackets, str_replace, str_fill
|
||||
from guessit import PY2
|
||||
from guessit import __main__
|
||||
|
||||
if PY2:
|
||||
from StringIO import StringIO
|
||||
else:
|
||||
from io import StringIO
|
||||
|
||||
|
||||
class TestMain(TestGuessit):
|
||||
def setUp(self):
|
||||
self._stdout = sys.stdout
|
||||
string_out = StringIO()
|
||||
sys.stdout = string_out
|
||||
|
||||
def tearDown(self):
|
||||
sys.stdout = self._stdout
|
||||
|
||||
def test_list_properties(self):
|
||||
__main__.main(["-p"], False)
|
||||
__main__.main(["-l"], False)
|
||||
|
||||
def test_list_transformers(self):
|
||||
__main__.main(["--transformers"], False)
|
||||
__main__.main(["-l", "--transformers"], False)
|
||||
|
||||
def test_demo(self):
|
||||
__main__.main(["-d"], False)
|
||||
__main__.main(["-l"], False)
|
||||
|
||||
def test_filename(self):
|
||||
__main__.main(["A.Movie.2014.avi"], False)
|
||||
__main__.main(["A.Movie.2014.avi", "A.2nd.Movie.2014.avi"], False)
|
||||
__main__.main(["-y", "A.Movie.2014.avi"], False)
|
||||
__main__.main(["-a", "A.Movie.2014.avi"], False)
|
||||
__main__.main(["-v", "A.Movie.2014.avi"], False)
|
||||
__main__.main(["-t", "movie", "A.Movie.2014.avi"], False)
|
||||
__main__.main(["-t", "episode", "A.Serie.S02E06.avi"], False)
|
||||
__main__.main(["-i", "hash_mpc", file_in_same_dir(__file__, "1MB")], False)
|
||||
__main__.main(["-i", "hash_md5", file_in_same_dir(__file__, "1MB")], False)
|
||||
|
||||
suite = allTests(TestMain)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,93 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
|
||||
from guessit.transfo.guess_release_group import GuessReleaseGroup
|
||||
from guessit.transfo.guess_properties import GuessProperties
|
||||
from guessit.matchtree import BaseMatchTree
|
||||
|
||||
keywords = yaml.load("""
|
||||
|
||||
? Xvid PROPER
|
||||
: videoCodec: Xvid
|
||||
other: PROPER
|
||||
|
||||
? PROPER-Xvid
|
||||
: videoCodec: Xvid
|
||||
other: PROPER
|
||||
|
||||
""")
|
||||
|
||||
|
||||
def guess_info(string, options=None):
|
||||
mtree = MatchTree(string)
|
||||
GuessReleaseGroup().process(mtree, options)
|
||||
GuessProperties().process(mtree, options)
|
||||
return mtree.matched()
|
||||
|
||||
|
||||
class TestMatchTree(TestGuessit):
|
||||
def test_base_tree(self):
|
||||
t = BaseMatchTree('One Two Three(Three) Four')
|
||||
t.partition((3, 7, 20))
|
||||
leaves = t.leaves()
|
||||
|
||||
self.assertEqual(leaves[0].span, (0, 3))
|
||||
|
||||
self.assertEqual('One', leaves[0].value)
|
||||
self.assertEqual(' Two', leaves[1].value)
|
||||
self.assertEqual(' Three(Three)', leaves[2].value)
|
||||
self.assertEqual(' Four', leaves[3].value)
|
||||
|
||||
leaves[2].partition((1, 6, 7, 12))
|
||||
three_leaves = leaves[2].leaves()
|
||||
|
||||
self.assertEqual('Three', three_leaves[1].value)
|
||||
self.assertEqual('Three', three_leaves[3].value)
|
||||
|
||||
leaves = t.leaves()
|
||||
|
||||
self.assertEqual(len(leaves), 8)
|
||||
|
||||
self.assertEqual(leaves[5], three_leaves[3])
|
||||
|
||||
self.assertEqual(t.previous_leaf(leaves[5]), leaves[4])
|
||||
self.assertEqual(t.next_leaf(leaves[5]), leaves[6])
|
||||
|
||||
self.assertEqual(t.next_leaves(leaves[5]), [leaves[6], leaves[7]])
|
||||
self.assertEqual(t.previous_leaves(leaves[5]), [leaves[4], leaves[3], leaves[2], leaves[1], leaves[0]])
|
||||
|
||||
self.assertEqual(t.next_leaf(leaves[7]), None)
|
||||
self.assertEqual(t.previous_leaf(leaves[0]), None)
|
||||
|
||||
self.assertEqual(t.next_leaves(leaves[7]), [])
|
||||
self.assertEqual(t.previous_leaves(leaves[0]), [])
|
||||
|
||||
def test_match(self):
|
||||
self.checkFields(keywords, guess_info)
|
||||
|
||||
|
||||
suite = allTests(TestMatchTree)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,35 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
|
||||
|
||||
class TestMovie(TestGuessit):
|
||||
def testMovies(self):
|
||||
self.checkMinimumFieldsCorrect(filetype='movie',
|
||||
filename='movies.yaml')
|
||||
|
||||
|
||||
suite = allTests(TestMovie)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,126 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.quality import best_quality, best_quality_properties
|
||||
from guessit.containers import QualitiesContainer
|
||||
from guessit.test.guessittest import *
|
||||
|
||||
|
||||
class TestQuality(TestGuessit):
|
||||
def test_container(self):
|
||||
container = QualitiesContainer()
|
||||
|
||||
container.register_quality('color', 'red', 10)
|
||||
container.register_quality('color', 'orange', 20)
|
||||
container.register_quality('color', 'green', 30)
|
||||
|
||||
container.register_quality('context', 'sun', 100)
|
||||
container.register_quality('context', 'sea', 200)
|
||||
container.register_quality('context', 'sex', 300)
|
||||
|
||||
g1 = Guess()
|
||||
g1['color'] = 'red'
|
||||
|
||||
g2 = Guess()
|
||||
g2['color'] = 'green'
|
||||
|
||||
g3 = Guess()
|
||||
g3['color'] = 'orange'
|
||||
|
||||
q3 = container.rate_quality(g3)
|
||||
self.assertEqual(q3, 20, "ORANGE should be rated 20. Don't ask why!")
|
||||
|
||||
q1 = container.rate_quality(g1)
|
||||
q2 = container.rate_quality(g2)
|
||||
|
||||
self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
|
||||
|
||||
g1['context'] = 'sex'
|
||||
g2['context'] = 'sun'
|
||||
|
||||
q1 = container.rate_quality(g1)
|
||||
q2 = container.rate_quality(g2)
|
||||
|
||||
self.assertTrue(q1 > q2, "SEX should be greater than SUN. Don't ask why!")
|
||||
|
||||
self.assertEqual(container.best_quality(g1, g2), g1, "RED&SEX should be better than GREEN&SUN. Don't ask why!")
|
||||
|
||||
self.assertEqual(container.best_quality_properties(['color'], g1, g2), g2, "GREEN should be better than RED. Don't ask why!")
|
||||
|
||||
self.assertEqual(container.best_quality_properties(['context'], g1, g2), g1, "SEX should be better than SUN. Don't ask why!")
|
||||
|
||||
q1 = container.rate_quality(g1, 'color')
|
||||
q2 = container.rate_quality(g2, 'color')
|
||||
|
||||
self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
|
||||
|
||||
container.unregister_quality('context', 'sex')
|
||||
container.unregister_quality('context', 'sun')
|
||||
|
||||
q1 = container.rate_quality(g1)
|
||||
q2 = container.rate_quality(g2)
|
||||
|
||||
self.assertTrue(q2 > q1, "GREEN&SUN should be greater than RED&SEX. Don't ask why!")
|
||||
|
||||
g3['context'] = 'sea'
|
||||
container.unregister_quality('context', 'sea')
|
||||
|
||||
q3 = container.rate_quality(g3, 'context')
|
||||
self.assertEqual(q3, 0, "Context should be unregistered.")
|
||||
|
||||
container.unregister_quality('color')
|
||||
q3 = container.rate_quality(g3, 'color')
|
||||
|
||||
self.assertEqual(q3, 0, "Color should be unregistered.")
|
||||
|
||||
container.clear_qualities()
|
||||
|
||||
q1 = container.rate_quality(g1)
|
||||
q2 = container.rate_quality(g2)
|
||||
|
||||
self.assertTrue(q1 == q2 == 0, "Empty quality container should rate each guess to 0")
|
||||
|
||||
def test_quality_transformers(self):
|
||||
guess_720p = guessit.guess_file_info("2012.2009.720p.BluRay.x264.DTS WiKi.mkv")
|
||||
guess_1080p = guessit.guess_file_info("2012.2009.1080p.BluRay.x264.MP3 WiKi.mkv")
|
||||
|
||||
self.assertTrue('audioCodec' in guess_720p, "audioCodec should be present")
|
||||
self.assertTrue('audioCodec' in guess_1080p, "audioCodec should be present")
|
||||
self.assertTrue('screenSize' in guess_720p, "screenSize should be present")
|
||||
self.assertTrue('screenSize' in guess_1080p, "screenSize should be present")
|
||||
|
||||
best_quality_guess = best_quality(guess_720p, guess_1080p)
|
||||
|
||||
self.assertTrue(guess_1080p == best_quality_guess, "1080p+MP3 is not the best global quality")
|
||||
|
||||
best_quality_guess = best_quality_properties(['screenSize'], guess_720p, guess_1080p)
|
||||
|
||||
self.assertTrue(guess_1080p == best_quality_guess, "1080p is not the best screenSize")
|
||||
|
||||
best_quality_guess = best_quality_properties(['audioCodec'], guess_720p, guess_1080p)
|
||||
|
||||
self.assertTrue(guess_720p == best_quality_guess, "DTS is not the best audioCodec")
|
||||
|
||||
suite = allTests(TestQuality)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,155 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.test.guessittest import *
|
||||
from guessit.fileutils import split_path
|
||||
from guessit.textutils import strip_brackets, str_replace, str_fill, from_camel, is_camel,\
|
||||
levenshtein, reorder_title
|
||||
from guessit import PY2
|
||||
from guessit.date import search_date, search_year
|
||||
from datetime import datetime, date, timedelta
|
||||
|
||||
|
||||
class TestUtils(TestGuessit):
|
||||
def test_splitpath(self):
|
||||
alltests = {False: {'/usr/bin/smewt': ['/', 'usr', 'bin', 'smewt'],
|
||||
'relative_path/to/my_folder/': ['relative_path', 'to', 'my_folder'],
|
||||
'//some/path': ['//', 'some', 'path'],
|
||||
'//some//path': ['//', 'some', 'path'],
|
||||
'///some////path': ['///', 'some', 'path']
|
||||
|
||||
},
|
||||
True: {'C:\\Program Files\\Smewt\\smewt.exe': ['C:\\', 'Program Files', 'Smewt', 'smewt.exe'],
|
||||
'Documents and Settings\\User\\config': ['Documents and Settings', 'User', 'config'],
|
||||
'C:\\Documents and Settings\\User\\config': ['C:\\', 'Documents and Settings', 'User', 'config'],
|
||||
# http://bugs.python.org/issue19945
|
||||
'\\\\netdrive\\share': ['\\\\', 'netdrive', 'share'] if PY2 else ['\\\\netdrive\\share'],
|
||||
'\\\\netdrive\\share\\folder': ['\\\\', 'netdrive', 'share', 'folder'] if PY2 else ['\\\\netdrive\\share\\', 'folder'],
|
||||
}
|
||||
}
|
||||
tests = alltests[sys.platform == 'win32']
|
||||
for path, split in tests.items():
|
||||
self.assertEqual(split, split_path(path))
|
||||
|
||||
def test_strip_brackets(self):
|
||||
allTests = (('', ''),
|
||||
('[test]', 'test'),
|
||||
('{test2}', 'test2'),
|
||||
('(test3)', 'test3'),
|
||||
('(test4]', '(test4]'),
|
||||
)
|
||||
|
||||
for i, e in allTests:
|
||||
self.assertEqual(e, strip_brackets(i))
|
||||
|
||||
def test_levenshtein(self):
|
||||
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmno"), 0)
|
||||
self.assertEqual(levenshtein("abcdef ghijk lmnop", "abcdef ghijk lmno"), 1)
|
||||
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmn"), 1)
|
||||
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnp"), 1)
|
||||
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnq"), 1)
|
||||
self.assertEqual(levenshtein("cbcdef ghijk lmno", "abcdef ghijk lmnq"), 2)
|
||||
self.assertEqual(levenshtein("cbcdef ghihk lmno", "abcdef ghijk lmnq"), 3)
|
||||
|
||||
def test_reorder_title(self):
|
||||
self.assertEqual(reorder_title("Simpsons, The"), "The Simpsons")
|
||||
self.assertEqual(reorder_title("Simpsons,The"), "The Simpsons")
|
||||
self.assertEqual(reorder_title("Simpsons,Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
|
||||
self.assertEqual(reorder_title("Simpsons, Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
|
||||
|
||||
def test_camel(self):
|
||||
self.assertEqual("", from_camel(""))
|
||||
|
||||
self.assertEqual("Hello world", str_replace("Hello World", 6, 'w'))
|
||||
self.assertEqual("Hello *****", str_fill("Hello World", (6, 11), '*'))
|
||||
|
||||
self.assertTrue("This is camel", from_camel("ThisIsCamel"))
|
||||
|
||||
self.assertEqual('camel case', from_camel('camelCase'))
|
||||
self.assertEqual('A case', from_camel('ACase'))
|
||||
self.assertEqual('MiXedCaSe is not camel case', from_camel('MiXedCaSe is not camelCase'))
|
||||
|
||||
self.assertEqual("This is camel cased title", from_camel("ThisIsCamelCasedTitle"))
|
||||
self.assertEqual("This is camel CASED title", from_camel("ThisIsCamelCASEDTitle"))
|
||||
|
||||
self.assertEqual("These are camel CASED title", from_camel("TheseAreCamelCASEDTitle"))
|
||||
|
||||
self.assertEqual("Give a camel case string", from_camel("GiveACamelCaseString"))
|
||||
|
||||
self.assertEqual("Death TO camel case", from_camel("DeathTOCamelCase"))
|
||||
self.assertEqual("But i like java too:)", from_camel("ButILikeJavaToo:)"))
|
||||
|
||||
self.assertEqual("Beatdown french DVD rip.mkv", from_camel("BeatdownFrenchDVDRip.mkv"))
|
||||
self.assertEqual("DO NOTHING ON UPPER CASE", from_camel("DO NOTHING ON UPPER CASE"))
|
||||
|
||||
self.assertFalse(is_camel("this_is_not_camel"))
|
||||
self.assertTrue(is_camel("ThisIsCamel"))
|
||||
|
||||
self.assertEqual("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv", from_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
|
||||
self.assertFalse(is_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
|
||||
|
||||
self.assertEqual("A2LiNE", from_camel("A2LiNE"))
|
||||
|
||||
def test_date(self):
|
||||
self.assertEqual(search_year(' in the year 2000... '), (2000, (13, 17)))
|
||||
self.assertEqual(search_year(' they arrived in 1492. '), (None, None))
|
||||
|
||||
today = date.today()
|
||||
today_year_2 = int(str(today.year)[2:])
|
||||
|
||||
future = today + timedelta(days=1000)
|
||||
future_year_2 = int(str(future.year)[2:])
|
||||
|
||||
past = today - timedelta(days=10000)
|
||||
past_year_2 = int(str(past.year)[2:])
|
||||
|
||||
self.assertEqual(search_date(' Something before 2002-04-22 '), (date(2002, 4, 22), (18, 28)))
|
||||
self.assertEqual(search_date(' 2002-04-22 Something after '), (date(2002, 4, 22), (1, 11)))
|
||||
|
||||
self.assertEqual(search_date(' This happened on 2002-04-22. '), (date(2002, 4, 22), (18, 28)))
|
||||
self.assertEqual(search_date(' This happened on 22-04-2002. '), (date(2002, 4, 22), (18, 28)))
|
||||
|
||||
self.assertEqual(search_date(' This happened on 13-04-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
|
||||
self.assertEqual(search_date(' This happened on 22-04-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
|
||||
self.assertEqual(search_date(' This happened on 20-04-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
|
||||
|
||||
self.assertEqual(search_date(' This happened on 04-13-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
|
||||
self.assertEqual(search_date(' This happened on 04-22-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
|
||||
self.assertEqual(search_date(' This happened on 04-20-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
|
||||
|
||||
self.assertEqual(search_date(' This happened on 35-12-%s. ' % (today_year_2,)), (None, None))
|
||||
self.assertEqual(search_date(' This happened on 37-18-%s. ' % (future_year_2,)), (None, None))
|
||||
self.assertEqual(search_date(' This happened on 44-42-%s. ' % (past_year_2)), (None, None))
|
||||
|
||||
self.assertEqual(search_date(' This happened on %s. ' % (today, )), (today, (18, 28)))
|
||||
self.assertEqual(search_date(' This happened on %s. ' % (future, )), (future, (18, 28)))
|
||||
self.assertEqual(search_date(' This happened on %s. ' % (past, )), (past, (18, 28)))
|
||||
|
||||
self.assertEqual(search_date(' released date: 04-03-1901? '), (None, None))
|
||||
|
||||
self.assertEqual(search_date(' There\'s no date in here. '), (None, None))
|
||||
|
||||
|
||||
suite = allTests(TestUtils)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner(verbosity=2).run(suite)
|
|
@ -1,25 +1,24 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Smewt - A smart collection manager
|
||||
# Copyright (c) 2008-2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# Smewt is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# Smewt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import s
|
||||
from guessit.patterns import sep
|
||||
import functools
|
||||
|
@ -28,7 +27,6 @@ import re
|
|||
|
||||
# string-related functions
|
||||
|
||||
|
||||
def normalize_unicode(s):
|
||||
return unicodedata.normalize('NFC', s)
|
||||
|
||||
|
@ -45,36 +43,19 @@ def strip_brackets(s):
|
|||
return s
|
||||
|
||||
|
||||
_dotted_rexp = re.compile(r'(?:\W|^)(([A-Za-z]\.){2,}[A-Za-z]\.?)')
|
||||
|
||||
|
||||
def clean_string(st):
|
||||
for c in sep:
|
||||
# do not remove certain chars
|
||||
if c in ['-', ',']:
|
||||
continue
|
||||
|
||||
if c == '.':
|
||||
# we should not remove the dots for acronyms and such
|
||||
dotted = _dotted_rexp.search(st)
|
||||
if dotted:
|
||||
s = dotted.group(1)
|
||||
exclude_begin, exclude_end = dotted.span(1)
|
||||
|
||||
st = (st[:exclude_begin].replace(c, ' ') +
|
||||
st[exclude_begin:exclude_end] +
|
||||
st[exclude_end:].replace(c, ' '))
|
||||
continue
|
||||
|
||||
st = st.replace(c, ' ')
|
||||
|
||||
parts = st.split()
|
||||
result = ' '.join(p for p in parts if p != '')
|
||||
|
||||
# now also remove dashes on the outer part of the string
|
||||
while result and result[0] in '-':
|
||||
while result and result[0] in sep:
|
||||
result = result[1:]
|
||||
while result and result[-1] in '-':
|
||||
while result and result[-1] in sep:
|
||||
result = result[:-1]
|
||||
|
||||
return result
|
||||
|
@ -82,23 +63,21 @@ def clean_string(st):
|
|||
|
||||
_words_rexp = re.compile('\w+', re.UNICODE)
|
||||
|
||||
|
||||
def find_words(s):
|
||||
return _words_rexp.findall(s.replace('_', ' '))
|
||||
|
||||
|
||||
def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
||||
def reorder_title(title):
|
||||
ltitle = title.lower()
|
||||
for article in articles:
|
||||
for separator in separators:
|
||||
suffix = separator + article
|
||||
if ltitle[-len(suffix):] == suffix:
|
||||
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
|
||||
if ltitle[-4:] == ',the':
|
||||
return title[-3:] + ' ' + title[:-4]
|
||||
if ltitle[-5:] == ', the':
|
||||
return title[-3:] + ' ' + title[:-5]
|
||||
return title
|
||||
|
||||
|
||||
def str_replace(string, pos, c):
|
||||
return string[:pos] + c + string[pos + 1:]
|
||||
return string[:pos] + c + string[pos+1:]
|
||||
|
||||
|
||||
def str_fill(string, region, c):
|
||||
|
@ -106,6 +85,7 @@ def str_fill(string, region, c):
|
|||
return string[:start] + c * (end - start) + string[end:]
|
||||
|
||||
|
||||
|
||||
def levenshtein(a, b):
|
||||
if not a:
|
||||
return len(b)
|
||||
|
@ -115,25 +95,25 @@ def levenshtein(a, b):
|
|||
m = len(a)
|
||||
n = len(b)
|
||||
d = []
|
||||
for i in range(m + 1):
|
||||
d.append([0] * (n + 1))
|
||||
for i in range(m+1):
|
||||
d.append([0] * (n+1))
|
||||
|
||||
for i in range(m + 1):
|
||||
for i in range(m+1):
|
||||
d[i][0] = i
|
||||
|
||||
for j in range(n + 1):
|
||||
for j in range(n+1):
|
||||
d[0][j] = j
|
||||
|
||||
for i in range(1, m + 1):
|
||||
for j in range(1, n + 1):
|
||||
if a[i - 1] == b[j - 1]:
|
||||
for i in range(1, m+1):
|
||||
for j in range(1, n+1):
|
||||
if a[i-1] == b[j-1]:
|
||||
cost = 0
|
||||
else:
|
||||
cost = 1
|
||||
|
||||
d[i][j] = min(d[i - 1][j] + 1, # deletion
|
||||
d[i][j - 1] + 1, # insertion
|
||||
d[i - 1][j - 1] + cost # substitution
|
||||
d[i][j] = min(d[i-1][j] + 1, # deletion
|
||||
d[i][j-1] + 1, # insertion
|
||||
d[i-1][j-1] + cost # substitution
|
||||
)
|
||||
|
||||
return d[m][n]
|
||||
|
@ -160,7 +140,7 @@ def find_first_level_groups_span(string, enclosing):
|
|||
[(2, 5), (7, 10)]
|
||||
"""
|
||||
opening, closing = enclosing
|
||||
depth = [] # depth is a stack of indices where we opened a group
|
||||
depth = [] # depth is a stack of indices where we opened a group
|
||||
result = []
|
||||
for i, c, in enumerate(string):
|
||||
if c == opening:
|
||||
|
@ -171,7 +151,7 @@ def find_first_level_groups_span(string, enclosing):
|
|||
end = i
|
||||
if not depth:
|
||||
# we emptied our stack, so we have a 1st level group
|
||||
result.append((start, end + 1))
|
||||
result.append((start, end+1))
|
||||
except IndexError:
|
||||
# we closed a group which was not opened before
|
||||
pass
|
||||
|
@ -192,7 +172,7 @@ def split_on_groups(string, groups):
|
|||
|
||||
"""
|
||||
if not groups:
|
||||
return [string]
|
||||
return [ string ]
|
||||
|
||||
boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, [])))
|
||||
if boundaries[0] != 0:
|
||||
|
@ -200,10 +180,10 @@ def split_on_groups(string, groups):
|
|||
if boundaries[-1] != len(string):
|
||||
boundaries.append(len(string))
|
||||
|
||||
groups = [string[start:end] for start, end in zip(boundaries[:-1],
|
||||
boundaries[1:])]
|
||||
groups = [ string[start:end] for start, end in zip(boundaries[:-1],
|
||||
boundaries[1:]) ]
|
||||
|
||||
return [g for g in groups if g] # return only non-empty groups
|
||||
return [ g for g in groups if g ] # return only non-empty groups
|
||||
|
||||
|
||||
def find_first_level_groups(string, enclosing, blank_sep=None):
|
||||
|
@ -239,114 +219,6 @@ def find_first_level_groups(string, enclosing, blank_sep=None):
|
|||
if blank_sep:
|
||||
for start, end in groups:
|
||||
string = str_replace(string, start, blank_sep)
|
||||
string = str_replace(string, end - 1, blank_sep)
|
||||
string = str_replace(string, end-1, blank_sep)
|
||||
|
||||
return split_on_groups(string, groups)
|
||||
|
||||
|
||||
_camel_word2_set = set(('is', 'to',))
|
||||
_camel_word3_set = set(('the',))
|
||||
|
||||
|
||||
def _camel_split_and_lower(string, i):
|
||||
"""Retrieves a tuple (need_split, need_lower)
|
||||
|
||||
need_split is True if this char is a first letter in a camelCasedString.
|
||||
need_lower is True if this char should be lowercased.
|
||||
"""
|
||||
|
||||
def islower(c):
|
||||
return c.isalpha() and not c.isupper()
|
||||
|
||||
previous_char2 = string[i - 2] if i > 1 else None
|
||||
previous_char = string[i - 1] if i > 0 else None
|
||||
char = string[i]
|
||||
next_char = string[i + 1] if i + 1 < len(string) else None
|
||||
next_char2 = string[i + 2] if i + 2 < len(string) else None
|
||||
|
||||
char_upper = char.isupper()
|
||||
char_lower = islower(char)
|
||||
|
||||
# previous_char2_lower = islower(previous_char2) if previous_char2 else False
|
||||
previous_char2_upper = previous_char2.isupper() if previous_char2 else False
|
||||
|
||||
previous_char_lower = islower(previous_char) if previous_char else False
|
||||
previous_char_upper = previous_char.isupper() if previous_char else False
|
||||
|
||||
next_char_upper = next_char.isupper() if next_char else False
|
||||
next_char_lower = islower(next_char) if next_char else False
|
||||
|
||||
next_char2_upper = next_char2.isupper() if next_char2 else False
|
||||
# next_char2_lower = islower(next_char2) if next_char2 else False
|
||||
|
||||
mixedcase_word = (previous_char_upper and char_lower and next_char_upper) or \
|
||||
(previous_char_lower and char_upper and next_char_lower and next_char2_upper) or \
|
||||
(previous_char2_upper and previous_char_lower and char_upper)
|
||||
if mixedcase_word:
|
||||
word2 = (char + next_char).lower() if next_char else None
|
||||
word3 = (char + next_char + next_char2).lower() if next_char and next_char2 else None
|
||||
word2b = (previous_char2 + previous_char).lower() if previous_char2 and previous_char else None
|
||||
if word2 in _camel_word2_set or word2b in _camel_word2_set or word3 in _camel_word3_set:
|
||||
mixedcase_word = False
|
||||
|
||||
uppercase_word = previous_char_upper and char_upper and next_char_upper or (char_upper and next_char_upper and next_char2_upper)
|
||||
|
||||
need_split = char_upper and previous_char_lower and not mixedcase_word
|
||||
|
||||
if not need_split:
|
||||
previous_char_upper = string[i - 1].isupper() if i > 0 else False
|
||||
next_char_lower = (string[i + 1].isalpha() and not string[i + 1].isupper()) if i + 1 < len(string) else False
|
||||
need_split = char_upper and previous_char_upper and next_char_lower
|
||||
uppercase_word = previous_char_upper and not next_char_lower
|
||||
|
||||
need_lower = not uppercase_word and not mixedcase_word and need_split
|
||||
|
||||
return (need_split, need_lower)
|
||||
|
||||
|
||||
def is_camel(string):
|
||||
"""
|
||||
>>> is_camel('dogEATDog')
|
||||
True
|
||||
>>> is_camel('DeathToCamelCase')
|
||||
True
|
||||
>>> is_camel('death_to_camel_case')
|
||||
False
|
||||
>>> is_camel('TheBest')
|
||||
True
|
||||
>>> is_camel('The Best')
|
||||
False
|
||||
"""
|
||||
for i in range(0, len(string)):
|
||||
need_split, _ = _camel_split_and_lower(string, i)
|
||||
if need_split:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def from_camel(string):
|
||||
"""
|
||||
>>> from_camel('dogEATDog') == 'dog EAT dog'
|
||||
True
|
||||
>>> from_camel('DeathToCamelCase') == 'Death to camel case'
|
||||
True
|
||||
>>> from_camel('TheBest') == 'The best'
|
||||
True
|
||||
>>> from_camel('MiXedCaSe is not camelCase') == 'MiXedCaSe is not camel case'
|
||||
True
|
||||
"""
|
||||
if not string:
|
||||
return string
|
||||
pieces = []
|
||||
|
||||
for i in range(0, len(string)):
|
||||
char = string[i]
|
||||
need_split, need_lower = _camel_split_and_lower(string, i)
|
||||
if need_split:
|
||||
pieces.append(' ')
|
||||
|
||||
if need_lower:
|
||||
pieces.append(char.lower())
|
||||
else:
|
||||
pieces.append(char)
|
||||
return ''.join(pieces)
|
||||
|
|
|
@ -1,341 +0,0 @@
|
|||
# Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
|
||||
AC
|
||||
AD
|
||||
AE
|
||||
AERO
|
||||
AF
|
||||
AG
|
||||
AI
|
||||
AL
|
||||
AM
|
||||
AN
|
||||
AO
|
||||
AQ
|
||||
AR
|
||||
ARPA
|
||||
AS
|
||||
ASIA
|
||||
AT
|
||||
AU
|
||||
AW
|
||||
AX
|
||||
AZ
|
||||
BA
|
||||
BB
|
||||
BD
|
||||
BE
|
||||
BF
|
||||
BG
|
||||
BH
|
||||
BI
|
||||
BIKE
|
||||
BIZ
|
||||
BJ
|
||||
BM
|
||||
BN
|
||||
BO
|
||||
BR
|
||||
BS
|
||||
BT
|
||||
BV
|
||||
BW
|
||||
BY
|
||||
BZ
|
||||
CA
|
||||
CAMERA
|
||||
CAT
|
||||
CC
|
||||
CD
|
||||
CF
|
||||
CG
|
||||
CH
|
||||
CI
|
||||
CK
|
||||
CL
|
||||
CLOTHING
|
||||
CM
|
||||
CN
|
||||
CO
|
||||
COM
|
||||
CONSTRUCTION
|
||||
CONTRACTORS
|
||||
COOP
|
||||
CR
|
||||
CU
|
||||
CV
|
||||
CW
|
||||
CX
|
||||
CY
|
||||
CZ
|
||||
DE
|
||||
DIAMONDS
|
||||
DIRECTORY
|
||||
DJ
|
||||
DK
|
||||
DM
|
||||
DO
|
||||
DZ
|
||||
EC
|
||||
EDU
|
||||
EE
|
||||
EG
|
||||
ENTERPRISES
|
||||
EQUIPMENT
|
||||
ER
|
||||
ES
|
||||
ESTATE
|
||||
ET
|
||||
EU
|
||||
FI
|
||||
FJ
|
||||
FK
|
||||
FM
|
||||
FO
|
||||
FR
|
||||
GA
|
||||
GALLERY
|
||||
GB
|
||||
GD
|
||||
GE
|
||||
GF
|
||||
GG
|
||||
GH
|
||||
GI
|
||||
GL
|
||||
GM
|
||||
GN
|
||||
GOV
|
||||
GP
|
||||
GQ
|
||||
GR
|
||||
GRAPHICS
|
||||
GS
|
||||
GT
|
||||
GU
|
||||
GURU
|
||||
GW
|
||||
GY
|
||||
HK
|
||||
HM
|
||||
HN
|
||||
HOLDINGS
|
||||
HR
|
||||
HT
|
||||
HU
|
||||
ID
|
||||
IE
|
||||
IL
|
||||
IM
|
||||
IN
|
||||
INFO
|
||||
INT
|
||||
IO
|
||||
IQ
|
||||
IR
|
||||
IS
|
||||
IT
|
||||
JE
|
||||
JM
|
||||
JO
|
||||
JOBS
|
||||
JP
|
||||
KE
|
||||
KG
|
||||
KH
|
||||
KI
|
||||
KITCHEN
|
||||
KM
|
||||
KN
|
||||
KP
|
||||
KR
|
||||
KW
|
||||
KY
|
||||
KZ
|
||||
LA
|
||||
LAND
|
||||
LB
|
||||
LC
|
||||
LI
|
||||
LIGHTING
|
||||
LK
|
||||
LR
|
||||
LS
|
||||
LT
|
||||
LU
|
||||
LV
|
||||
LY
|
||||
MA
|
||||
MC
|
||||
MD
|
||||
ME
|
||||
MG
|
||||
MH
|
||||
MIL
|
||||
MK
|
||||
ML
|
||||
MM
|
||||
MN
|
||||
MO
|
||||
MOBI
|
||||
MP
|
||||
MQ
|
||||
MR
|
||||
MS
|
||||
MT
|
||||
MU
|
||||
MUSEUM
|
||||
MV
|
||||
MW
|
||||
MX
|
||||
MY
|
||||
MZ
|
||||
NA
|
||||
NAME
|
||||
NC
|
||||
NE
|
||||
NET
|
||||
NF
|
||||
NG
|
||||
NI
|
||||
NL
|
||||
NO
|
||||
NP
|
||||
NR
|
||||
NU
|
||||
NZ
|
||||
OM
|
||||
ORG
|
||||
PA
|
||||
PE
|
||||
PF
|
||||
PG
|
||||
PH
|
||||
PHOTOGRAPHY
|
||||
PK
|
||||
PL
|
||||
PLUMBING
|
||||
PM
|
||||
PN
|
||||
POST
|
||||
PR
|
||||
PRO
|
||||
PS
|
||||
PT
|
||||
PW
|
||||
PY
|
||||
QA
|
||||
RE
|
||||
RO
|
||||
RS
|
||||
RU
|
||||
RW
|
||||
SA
|
||||
SB
|
||||
SC
|
||||
SD
|
||||
SE
|
||||
SEXY
|
||||
SG
|
||||
SH
|
||||
SI
|
||||
SINGLES
|
||||
SJ
|
||||
SK
|
||||
SL
|
||||
SM
|
||||
SN
|
||||
SO
|
||||
SR
|
||||
ST
|
||||
SU
|
||||
SV
|
||||
SX
|
||||
SY
|
||||
SZ
|
||||
TATTOO
|
||||
TC
|
||||
TD
|
||||
TECHNOLOGY
|
||||
TEL
|
||||
TF
|
||||
TG
|
||||
TH
|
||||
TIPS
|
||||
TJ
|
||||
TK
|
||||
TL
|
||||
TM
|
||||
TN
|
||||
TO
|
||||
TODAY
|
||||
TP
|
||||
TR
|
||||
TRAVEL
|
||||
TT
|
||||
TV
|
||||
TW
|
||||
TZ
|
||||
UA
|
||||
UG
|
||||
UK
|
||||
US
|
||||
UY
|
||||
UZ
|
||||
VA
|
||||
VC
|
||||
VE
|
||||
VENTURES
|
||||
VG
|
||||
VI
|
||||
VN
|
||||
VOYAGE
|
||||
VU
|
||||
WF
|
||||
WS
|
||||
XN--3E0B707E
|
||||
XN--45BRJ9C
|
||||
XN--80AO21A
|
||||
XN--80ASEHDB
|
||||
XN--80ASWG
|
||||
XN--90A3AC
|
||||
XN--CLCHC0EA0B2G2A9GCD
|
||||
XN--FIQS8S
|
||||
XN--FIQZ9S
|
||||
XN--FPCRJ9C3D
|
||||
XN--FZC2C9E2C
|
||||
XN--GECRJ9C
|
||||
XN--H2BRJ9C
|
||||
XN--J1AMH
|
||||
XN--J6W193G
|
||||
XN--KPRW13D
|
||||
XN--KPRY57D
|
||||
XN--L1ACC
|
||||
XN--LGBBAT1AD8J
|
||||
XN--MGB9AWBF
|
||||
XN--MGBA3A4F16A
|
||||
XN--MGBAAM7A8H
|
||||
XN--MGBAYH7GPA
|
||||
XN--MGBBH1A71E
|
||||
XN--MGBC0A9AZCG
|
||||
XN--MGBERP4A5D4AR
|
||||
XN--MGBX4CD0AB
|
||||
XN--NGBC5AZD
|
||||
XN--O3CW4H
|
||||
XN--OGBPF8FL
|
||||
XN--P1AI
|
||||
XN--PGBS0DH
|
||||
XN--Q9JYB4C
|
||||
XN--S9BRJ9C
|
||||
XN--UNUP4Y
|
||||
XN--WGBH1C
|
||||
XN--WGBL6A
|
||||
XN--XKC2AL3HYE2A
|
||||
XN--XKC2DL3A5EE0H
|
||||
XN--YFRO4I67O
|
||||
XN--YGBI2AMMX
|
||||
XXX
|
||||
YE
|
||||
YT
|
||||
ZA
|
||||
ZM
|
||||
ZW
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,13 +18,92 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit import base_text_type, Guess
|
||||
from guessit.patterns import canonical_form
|
||||
from guessit.textutils import clean_string
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TransformerException(Exception):
|
||||
def __init__(self, transformer, message):
|
||||
def found_property(node, name, confidence):
|
||||
node.guess = Guess({name: node.clean_value}, confidence=confidence, raw=node.value)
|
||||
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
|
||||
|
||||
# Call the base class constructor with the parameters it needs
|
||||
Exception.__init__(self, message)
|
||||
|
||||
self.transformer = transformer
|
||||
def format_guess(guess):
|
||||
"""Format all the found values to their natural type.
|
||||
For instance, a year would be stored as an int value, etc...
|
||||
|
||||
Note that this modifies the dictionary given as input.
|
||||
"""
|
||||
for prop, value in guess.items():
|
||||
if prop in ('season', 'episodeNumber', 'year', 'cdNumber',
|
||||
'cdNumberTotal', 'bonusNumber', 'filmNumber'):
|
||||
guess[prop] = int(guess[prop])
|
||||
elif isinstance(value, base_text_type):
|
||||
if prop in ('edition',):
|
||||
value = clean_string(value)
|
||||
guess[prop] = canonical_form(value).replace('\\', '')
|
||||
|
||||
return guess
|
||||
|
||||
|
||||
def find_and_split_node(node, strategy, logger):
|
||||
string = ' %s ' % node.value # add sentinels
|
||||
for matcher, confidence, args, kwargs in strategy:
|
||||
all_args = [string]
|
||||
if getattr(matcher, 'use_node', False):
|
||||
all_args.append(node)
|
||||
if args:
|
||||
all_args.append(args)
|
||||
|
||||
if kwargs:
|
||||
result, span = matcher(*all_args, **kwargs)
|
||||
else:
|
||||
result, span = matcher(*all_args)
|
||||
|
||||
if result:
|
||||
# readjust span to compensate for sentinels
|
||||
span = (span[0] - 1, span[1] - 1)
|
||||
|
||||
if isinstance(result, Guess):
|
||||
if confidence is None:
|
||||
confidence = result.confidence(list(result.keys())[0])
|
||||
else:
|
||||
if confidence is None:
|
||||
confidence = 1.0
|
||||
|
||||
guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1]))
|
||||
msg = 'Found with confidence %.2f: %s' % (confidence, guess)
|
||||
(logger or log).debug(msg)
|
||||
|
||||
node.partition(span)
|
||||
absolute_span = (span[0] + node.offset, span[1] + node.offset)
|
||||
for child in node.children:
|
||||
if child.span == absolute_span:
|
||||
child.guess = guess
|
||||
else:
|
||||
find_and_split_node(child, strategy, logger)
|
||||
return
|
||||
|
||||
|
||||
class SingleNodeGuesser(object):
|
||||
def __init__(self, guess_func, confidence, logger, *args, **kwargs):
|
||||
self.guess_func = guess_func
|
||||
self.confidence = confidence
|
||||
self.logger = logger
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def process(self, mtree):
|
||||
# strategy is a list of pairs (guesser, confidence)
|
||||
# - if the guesser returns a guessit.Guess and confidence is specified,
|
||||
# it will override it, otherwise it will leave the guess confidence
|
||||
# - if the guesser returns a simple dict as a guess and confidence is
|
||||
# specified, it will use it, or 1.0 otherwise
|
||||
strategy = [ (self.guess_func, self.confidence, self.args, self.kwargs) ]
|
||||
|
||||
for node in mtree.unidentified_leaves():
|
||||
find_and_split_node(node, strategy, self.logger)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,50 +18,44 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import found_property
|
||||
import logging
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import found_property
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessBonusFeatures(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -150)
|
||||
def process(mtree):
|
||||
def previous_group(g):
|
||||
for leaf in mtree.unidentified_leaves()[::-1]:
|
||||
if leaf.node_idx < g.node_idx:
|
||||
return leaf
|
||||
|
||||
def supported_properties(self):
|
||||
return ['bonusNumber', 'bonusTitle', 'filmNumber', 'filmSeries', 'title', 'series']
|
||||
def next_group(g):
|
||||
for leaf in mtree.unidentified_leaves():
|
||||
if leaf.node_idx > g.node_idx:
|
||||
return leaf
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
def previous_group(g):
|
||||
for leaf in mtree.unidentified_leaves()[::-1]:
|
||||
if leaf.node_idx < g.node_idx:
|
||||
return leaf
|
||||
def same_group(g1, g2):
|
||||
return g1.node_idx[:2] == g2.node_idx[:2]
|
||||
|
||||
def next_group(g):
|
||||
for leaf in mtree.unidentified_leaves():
|
||||
if leaf.node_idx > g.node_idx:
|
||||
return leaf
|
||||
bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ]
|
||||
if bonus:
|
||||
bonusTitle = next_group(bonus[0])
|
||||
if same_group(bonusTitle, bonus[0]):
|
||||
found_property(bonusTitle, 'bonusTitle', 0.8)
|
||||
|
||||
def same_group(g1, g2):
|
||||
return g1.node_idx[:2] == g2.node_idx[:2]
|
||||
filmNumber = [ node for node in mtree.leaves()
|
||||
if 'filmNumber' in node.guess ]
|
||||
if filmNumber:
|
||||
filmSeries = previous_group(filmNumber[0])
|
||||
found_property(filmSeries, 'filmSeries', 0.9)
|
||||
|
||||
bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess]
|
||||
if bonus:
|
||||
bonusTitle = next_group(bonus[0])
|
||||
if bonusTitle and same_group(bonusTitle, bonus[0]):
|
||||
found_property(bonusTitle, 'bonusTitle', confidence=0.8)
|
||||
title = next_group(filmNumber[0])
|
||||
found_property(title, 'title', 0.9)
|
||||
|
||||
filmNumber = [node for node in mtree.leaves()
|
||||
if 'filmNumber' in node.guess]
|
||||
if filmNumber:
|
||||
filmSeries = previous_group(filmNumber[0])
|
||||
found_property(filmSeries, 'filmSeries', confidence=0.9)
|
||||
|
||||
title = next_group(filmNumber[0])
|
||||
found_property(title, 'title', confidence=0.9)
|
||||
|
||||
season = [node for node in mtree.leaves() if 'season' in node.guess]
|
||||
if season and 'bonusNumber' in mtree.info:
|
||||
series = previous_group(season[0])
|
||||
if same_group(series, season[0]):
|
||||
found_property(series, 'series', confidence=0.9)
|
||||
season = [ node for node in mtree.leaves() if 'season' in node.guess ]
|
||||
if season and 'bonusNumber' in mtree.info:
|
||||
series = previous_group(season[0])
|
||||
if same_group(series, season[0]):
|
||||
found_property(series, 'series', 0.9)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,52 +18,31 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from __future__ import unicode_literals
|
||||
from guessit.country import Country
|
||||
from guessit import Guess
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class GuessCountry(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -170)
|
||||
# list of common words which could be interpreted as countries, but which
|
||||
# are far too common to be able to say they represent a country
|
||||
self.country_common_words = frozenset(['bt', 'bb'])
|
||||
# list of common words which could be interpreted as countries, but which
|
||||
# are far too common to be able to say they represent a country
|
||||
country_common_words = frozenset([ 'bt', 'bb' ])
|
||||
|
||||
def supported_properties(self):
|
||||
return ['country']
|
||||
def process(mtree):
|
||||
for node in mtree.unidentified_leaves():
|
||||
if len(node.node_idx) == 2:
|
||||
c = node.value[1:-1].lower()
|
||||
if c in country_common_words:
|
||||
continue
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
options = options or {}
|
||||
return 'nocountry' not in options.keys()
|
||||
# only keep explicit groups (enclosed in parentheses/brackets)
|
||||
if node.value[0] + node.value[-1] not in ['()', '[]', '{}']:
|
||||
continue
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
for node in mtree.unidentified_leaves():
|
||||
if len(node.node_idx) == 2:
|
||||
c = node.value[1:-1].lower()
|
||||
if c in self.country_common_words:
|
||||
continue
|
||||
try:
|
||||
country = Country(c, strict=True)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# only keep explicit groups (enclosed in parentheses/brackets)
|
||||
if not node.is_explicit():
|
||||
continue
|
||||
|
||||
try:
|
||||
country = Country(c, strict=True)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
node.guess = Guess(country=country, confidence=1.0, input=node.value, span=node.span)
|
||||
|
||||
def post_process(self, mtree, options=None, *args, **kwargs):
|
||||
# if country is in the guessed properties, make it part of the series name
|
||||
series_leaves = mtree.leaves_containing('series')
|
||||
country_leaves = mtree.leaves_containing('country')
|
||||
|
||||
if series_leaves and country_leaves:
|
||||
country_leaf = country_leaves[0]
|
||||
for serie_leaf in series_leaves:
|
||||
serie_leaf.guess['series'] += ' (%s)' % country_leaf.guess['country'].alpha2.upper()
|
||||
#result['series'] += ' (%s)' % result['country'].alpha2.upper()
|
||||
node.guess = Guess(country=country, confidence=1.0, raw=c)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,26 +18,21 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.date import search_date
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessDate(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 50)
|
||||
def guess_date(string):
|
||||
date, span = search_date(string)
|
||||
if date:
|
||||
return { 'date': date }, span
|
||||
else:
|
||||
return None, None
|
||||
|
||||
def supported_properties(self):
|
||||
return ['date']
|
||||
|
||||
def guess_date(self, string, node=None, options=None):
|
||||
date, span = search_date(string)
|
||||
if date:
|
||||
return {'date': date}, span
|
||||
else:
|
||||
return None, None
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_date, 1.0, log).process(mtree)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,145 +18,129 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import found_property
|
||||
from guessit.patterns import non_episode_title, unlikely_series
|
||||
import logging
|
||||
|
||||
from guessit.plugins.transformers import Transformer, get_transformer
|
||||
from guessit.textutils import reorder_title
|
||||
|
||||
from guessit.matcher import found_property
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessEpisodeInfoFromPosition(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -200)
|
||||
def match_from_epnum_position(mtree, node):
|
||||
epnum_idx = node.node_idx
|
||||
|
||||
def supported_properties(self):
|
||||
return ['title', 'series']
|
||||
# a few helper functions to be able to filter using high-level semantics
|
||||
def before_epnum_in_same_pathgroup():
|
||||
return [ leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] < epnum_idx[1:]) ]
|
||||
|
||||
def match_from_epnum_position(self, mtree, node):
|
||||
epnum_idx = node.node_idx
|
||||
def after_epnum_in_same_pathgroup():
|
||||
return [ leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] > epnum_idx[1:]) ]
|
||||
|
||||
# a few helper functions to be able to filter using high-level semantics
|
||||
def before_epnum_in_same_pathgroup():
|
||||
return [leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] < epnum_idx[1:])]
|
||||
def after_epnum_in_same_explicitgroup():
|
||||
return [ leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[:2] == epnum_idx[:2] and
|
||||
leaf.node_idx[2:] > epnum_idx[2:]) ]
|
||||
|
||||
def after_epnum_in_same_pathgroup():
|
||||
return [leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] > epnum_idx[1:])]
|
||||
# epnumber is the first group and there are only 2 after it in same
|
||||
# path group
|
||||
# -> series title - episode title
|
||||
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
|
||||
if n.clean_value.lower() not in non_episode_title ]
|
||||
if ('title' not in mtree.info and # no title
|
||||
before_epnum_in_same_pathgroup() == [] and # no groups before
|
||||
len(title_candidates) == 2): # only 2 groups after
|
||||
|
||||
def after_epnum_in_same_explicitgroup():
|
||||
return [leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[:2] == epnum_idx[:2] and
|
||||
leaf.node_idx[2:] > epnum_idx[2:])]
|
||||
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||
found_property(title_candidates[1], 'title', confidence=0.4)
|
||||
return
|
||||
|
||||
# epnumber is the first group and there are only 2 after it in same
|
||||
# path group
|
||||
# -> series title - episode title
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
|
||||
# if we have at least 1 valid group before the episodeNumber, then it's
|
||||
# probably the series name
|
||||
series_candidates = before_epnum_in_same_pathgroup()
|
||||
if len(series_candidates) >= 1:
|
||||
found_property(series_candidates[0], 'series', confidence=0.7)
|
||||
|
||||
if ('title' not in mtree.info and # no title
|
||||
before_epnum_in_same_pathgroup() == [] and # no groups before
|
||||
len(title_candidates) == 2): # only 2 groups after
|
||||
|
||||
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||
found_property(title_candidates[1], 'title', confidence=0.4)
|
||||
return
|
||||
|
||||
# if we have at least 1 valid group before the episodeNumber, then it's
|
||||
# probably the series name
|
||||
series_candidates = before_epnum_in_same_pathgroup()
|
||||
if len(series_candidates) >= 1:
|
||||
found_property(series_candidates[0], 'series', confidence=0.7)
|
||||
|
||||
# only 1 group after (in the same path group) and it's probably the
|
||||
# episode title
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
|
||||
# only 1 group after (in the same path group) and it's probably the
|
||||
# episode title
|
||||
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
|
||||
if n.clean_value.lower() not in non_episode_title ]
|
||||
|
||||
if len(title_candidates) == 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.5)
|
||||
return
|
||||
else:
|
||||
# try in the same explicit group, with lower confidence
|
||||
title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
|
||||
if n.clean_value.lower() not in non_episode_title
|
||||
]
|
||||
if len(title_candidates) == 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.5)
|
||||
found_property(title_candidates[0], 'title', confidence=0.4)
|
||||
return
|
||||
elif len(title_candidates) > 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.3)
|
||||
return
|
||||
else:
|
||||
# try in the same explicit group, with lower confidence
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup())
|
||||
if len(title_candidates) == 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.4)
|
||||
return
|
||||
elif len(title_candidates) > 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.3)
|
||||
return
|
||||
|
||||
# get the one with the longest value
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
|
||||
if title_candidates:
|
||||
maxidx = -1
|
||||
maxv = -1
|
||||
for i, c in enumerate(title_candidates):
|
||||
if len(c.clean_value) > maxv:
|
||||
maxidx = i
|
||||
maxv = len(c.clean_value)
|
||||
found_property(title_candidates[maxidx], 'title', confidence=0.3)
|
||||
# get the one with the longest value
|
||||
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
|
||||
if n.clean_value.lower() not in non_episode_title ]
|
||||
if title_candidates:
|
||||
maxidx = -1
|
||||
maxv = -1
|
||||
for i, c in enumerate(title_candidates):
|
||||
if len(c.clean_value) > maxv:
|
||||
maxidx = i
|
||||
maxv = len(c.clean_value)
|
||||
found_property(title_candidates[maxidx], 'title', confidence=0.3)
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
options = options or {}
|
||||
return not options.get('skip_title') and mtree.guess.get('type', '').startswith('episode')
|
||||
|
||||
def _filter_candidates(self, candidates):
|
||||
episode_special_transformer = get_transformer('guess_episode_special')
|
||||
if episode_special_transformer:
|
||||
return [n for n in candidates if not episode_special_transformer.container.find_properties(n.value, n, re_match=True)]
|
||||
else:
|
||||
return candidates
|
||||
def process(mtree):
|
||||
eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
|
||||
if eps:
|
||||
match_from_epnum_position(mtree, eps[0])
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""
|
||||
try to identify the remaining unknown groups by looking at their
|
||||
position relative to other known elements
|
||||
"""
|
||||
eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
|
||||
if eps:
|
||||
self.match_from_epnum_position(mtree, eps[0])
|
||||
else:
|
||||
# if we don't have the episode number, but at least 2 groups in the
|
||||
# basename, then it's probably series - eptitle
|
||||
basename = mtree.node_at((-2,))
|
||||
title_candidates = [ n for n in basename.unidentified_leaves()
|
||||
if n.clean_value.lower() not in non_episode_title
|
||||
]
|
||||
|
||||
else:
|
||||
# if we don't have the episode number, but at least 2 groups in the
|
||||
# basename, then it's probably series - eptitle
|
||||
basename = mtree.node_at((-2,))
|
||||
if len(title_candidates) >= 2:
|
||||
found_property(title_candidates[0], 'series', 0.4)
|
||||
found_property(title_candidates[1], 'title', 0.4)
|
||||
elif len(title_candidates) == 1:
|
||||
# but if there's only one candidate, it's probably the series name
|
||||
found_property(title_candidates[0], 'series', 0.4)
|
||||
|
||||
title_candidates = self._filter_candidates(basename.unidentified_leaves())
|
||||
# if we only have 1 remaining valid group in the folder containing the
|
||||
# file, then it's likely that it is the series name
|
||||
try:
|
||||
series_candidates = mtree.node_at((-3,)).unidentified_leaves()
|
||||
except ValueError:
|
||||
series_candidates = []
|
||||
|
||||
if len(title_candidates) >= 2:
|
||||
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||
found_property(title_candidates[1], 'title', confidence=0.4)
|
||||
elif len(title_candidates) == 1:
|
||||
# but if there's only one candidate, it's probably the series name
|
||||
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||
if len(series_candidates) == 1:
|
||||
found_property(series_candidates[0], 'series', 0.3)
|
||||
|
||||
# if we only have 1 remaining valid group in the folder containing the
|
||||
# file, then it's likely that it is the series name
|
||||
try:
|
||||
series_candidates = mtree.node_at((-3,)).unidentified_leaves()
|
||||
except ValueError:
|
||||
series_candidates = []
|
||||
# if there's a path group that only contains the season info, then the
|
||||
# previous one is most likely the series title (ie: ../series/season X/..)
|
||||
eps = [ node for node in mtree.nodes()
|
||||
if 'season' in node.guess and 'episodeNumber' not in node.guess ]
|
||||
|
||||
if len(series_candidates) == 1:
|
||||
found_property(series_candidates[0], 'series', confidence=0.3)
|
||||
if eps:
|
||||
previous = [ node for node in mtree.unidentified_leaves()
|
||||
if node.node_idx[0] == eps[0].node_idx[0] - 1 ]
|
||||
if len(previous) == 1:
|
||||
found_property(previous[0], 'series', 0.5)
|
||||
|
||||
# if there's a path group that only contains the season info, then the
|
||||
# previous one is most likely the series title (ie: ../series/season X/..)
|
||||
eps = [node for node in mtree.nodes()
|
||||
if 'season' in node.guess and 'episodeNumber' not in node.guess]
|
||||
|
||||
if eps:
|
||||
previous = [node for node in mtree.unidentified_leaves()
|
||||
if node.node_idx[0] == eps[0].node_idx[0] - 1]
|
||||
if len(previous) == 1:
|
||||
found_property(previous[0], 'series', confidence=0.5)
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
for node in mtree.nodes():
|
||||
if 'series' not in node.guess:
|
||||
continue
|
||||
|
||||
node.guess['series'] = reorder_title(node.guess['series'])
|
||||
# reduce the confidence of unlikely series
|
||||
for node in mtree.nodes():
|
||||
if 'series' in node.guess:
|
||||
if node.guess['series'].lower() in unlikely_series:
|
||||
new_confidence = node.guess.confidence('series') * 0.5
|
||||
node.guess.set_confidence('series', new_confidence)
|
||||
|
|
|
@ -1,62 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import found_guess
|
||||
from guessit.containers import PropertiesContainer
|
||||
|
||||
|
||||
class GuessEpisodeSpecial(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -205)
|
||||
self.container = PropertiesContainer()
|
||||
self.container.register_property('special', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired')
|
||||
self.container.register_property('special', 'Extras?', canonical_form='Extras')
|
||||
|
||||
def guess_special(self, string, node=None, options=None):
|
||||
properties = self.container.find_properties(string, node, 'special', multiple=True)
|
||||
guesses = self.container.as_guess(properties, multiple=True)
|
||||
return guesses
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
if not mtree.guess.get('type', '').startswith('episode'):
|
||||
for unidentified_leaf in mtree.unidentified_leaves():
|
||||
properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, 'special')
|
||||
guess = self.container.as_guess(properties)
|
||||
if guess:
|
||||
return {'type': 'episode'}
|
||||
return None
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
if mtree.guess.get('type', '').startswith('episode') and (not mtree.info.get('episodeNumber') or mtree.info.get('season') == 0):
|
||||
for title_leaf in mtree.leaves_containing('title'):
|
||||
guesses = self.guess_special(title_leaf.value, title_leaf, options)
|
||||
for guess in guesses:
|
||||
found_guess(title_leaf, guess, update_guess=False)
|
||||
for unidentified_leaf in mtree.unidentified_leaves():
|
||||
guesses = self.guess_special(unidentified_leaf.value, unidentified_leaf, options)
|
||||
for guess in guesses:
|
||||
found_guess(unidentified_leaf, guess, update_guess=False)
|
||||
return None
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,63 +18,49 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import episode_rexps
|
||||
import re
|
||||
import logging
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from guessit.patterns import sep
|
||||
from guessit.containers import PropertiesContainer, WeakValidator, NoValidator
|
||||
from guessit.patterns.numeral import numeral, digital_numeral, parse_numeral
|
||||
from re import split as re_split
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def number_list(s):
|
||||
l = [ int(n) for n in re.sub('[^0-9]+', ' ', s).split() ]
|
||||
|
||||
if len(l) == 2:
|
||||
# it is an episode interval, return all numbers in between
|
||||
return range(l[0], l[1]+1)
|
||||
|
||||
return l
|
||||
|
||||
def guess_episodes_rexps(string):
|
||||
for rexp, confidence, span_adjust in episode_rexps:
|
||||
match = re.search(rexp, string, re.IGNORECASE)
|
||||
if match:
|
||||
span = (match.start() + span_adjust[0],
|
||||
match.end() + span_adjust[1])
|
||||
guess = Guess(match.groupdict(), confidence=confidence, raw=string[span[0]:span[1]])
|
||||
|
||||
# decide whether we have only a single episode number or an
|
||||
# episode list
|
||||
if guess.get('episodeNumber'):
|
||||
eplist = number_list(guess['episodeNumber'])
|
||||
guess.set('episodeNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
|
||||
|
||||
if len(eplist) > 1:
|
||||
guess.set('episodeList', eplist, confidence=confidence, raw=string[span[0]:span[1]])
|
||||
|
||||
if guess.get('bonusNumber'):
|
||||
eplist = number_list(guess['bonusNumber'])
|
||||
guess.set('bonusNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
|
||||
|
||||
return guess, span
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
class GuessEpisodesRexps(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 20)
|
||||
|
||||
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||
|
||||
def episode_parser(value):
|
||||
values = re_split('[a-zA-Z]', value)
|
||||
values = [x for x in values if x]
|
||||
ret = []
|
||||
for letters_elt in values:
|
||||
dashed_values = letters_elt.split('-')
|
||||
dashed_values = [x for x in dashed_values if x]
|
||||
if len(dashed_values) > 1:
|
||||
for _ in range(0, len(dashed_values) - 1):
|
||||
start_dash_ep = parse_numeral(dashed_values[0])
|
||||
end_dash_ep = parse_numeral(dashed_values[1])
|
||||
for dash_ep in range(start_dash_ep, end_dash_ep + 1):
|
||||
ret.append(dash_ep)
|
||||
else:
|
||||
ret.append(parse_numeral(letters_elt))
|
||||
if len(ret) > 1:
|
||||
return {None: ret[0], 'episodeList': ret} # TODO: Should support seasonList also
|
||||
elif len(ret) > 0:
|
||||
return ret[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator())
|
||||
self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||
self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator())
|
||||
self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral)
|
||||
|
||||
self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())
|
||||
|
||||
def supported_properties(self):
|
||||
return ['episodeNumber', 'season']
|
||||
|
||||
def guess_episodes_rexps(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node)
|
||||
return self.container.as_guess(found, string)
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
return mtree.guess.get('type', '').startswith('episode')
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_episodes_rexps, None, log).process(mtree)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,196 +18,182 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import mimetypes
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.patterns import (subtitle_exts, info_exts, video_exts, episode_rexps,
|
||||
find_properties, compute_canonical_form)
|
||||
from guessit.date import valid_year
|
||||
from guessit.textutils import clean_string
|
||||
import os.path
|
||||
import re
|
||||
import mimetypes
|
||||
import logging
|
||||
|
||||
from guessit.guess import Guess
|
||||
from guessit.patterns.extension import subtitle_exts, info_exts, video_exts
|
||||
from guessit.transfo import TransformerException
|
||||
from guessit.plugins.transformers import Transformer, get_transformer
|
||||
from guessit.matcher import log_found_guess, found_guess
|
||||
from guessit.textutils import clean_string
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# List of well known movies and series, hardcoded because they cannot be
|
||||
# guessed appropriately otherwise
|
||||
MOVIES = [ 'OSS 117' ]
|
||||
SERIES = [ 'Band of Brothers' ]
|
||||
|
||||
class GuessFiletype(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 250)
|
||||
MOVIES = [ m.lower() for m in MOVIES ]
|
||||
SERIES = [ s.lower() for s in SERIES ]
|
||||
|
||||
# List of well known movies and series, hardcoded because they cannot be
|
||||
# guessed appropriately otherwise
|
||||
MOVIES = ['OSS 117']
|
||||
SERIES = ['Band of Brothers']
|
||||
def guess_filetype(mtree, filetype):
|
||||
# put the filetype inside a dummy container to be able to have the
|
||||
# following functions work correctly as closures
|
||||
# this is a workaround for python 2 which doesn't have the
|
||||
# 'nonlocal' keyword (python 3 does have it)
|
||||
filetype_container = [filetype]
|
||||
other = {}
|
||||
filename = mtree.string
|
||||
|
||||
MOVIES = [m.lower() for m in MOVIES]
|
||||
SERIES = [s.lower() for s in SERIES]
|
||||
def upgrade_episode():
|
||||
if filetype_container[0] == 'video':
|
||||
filetype_container[0] = 'episode'
|
||||
elif filetype_container[0] == 'subtitle':
|
||||
filetype_container[0] = 'episodesubtitle'
|
||||
elif filetype_container[0] == 'info':
|
||||
filetype_container[0] = 'episodeinfo'
|
||||
|
||||
def guess_filetype(self, mtree, options=None):
|
||||
options = options or {}
|
||||
def upgrade_movie():
|
||||
if filetype_container[0] == 'video':
|
||||
filetype_container[0] = 'movie'
|
||||
elif filetype_container[0] == 'subtitle':
|
||||
filetype_container[0] = 'moviesubtitle'
|
||||
elif filetype_container[0] == 'info':
|
||||
filetype_container[0] = 'movieinfo'
|
||||
|
||||
# put the filetype inside a dummy container to be able to have the
|
||||
# following functions work correctly as closures
|
||||
# this is a workaround for python 2 which doesn't have the
|
||||
# 'nonlocal' keyword which we could use here in the upgrade_* functions
|
||||
# (python 3 does have it)
|
||||
filetype_container = [mtree.guess.get('type')]
|
||||
other = {}
|
||||
filename = mtree.string
|
||||
|
||||
def upgrade_episode():
|
||||
if filetype_container[0] == 'subtitle':
|
||||
filetype_container[0] = 'episodesubtitle'
|
||||
elif filetype_container[0] == 'info':
|
||||
filetype_container[0] = 'episodeinfo'
|
||||
elif not filetype_container[0]:
|
||||
filetype_container[0] = 'episode'
|
||||
|
||||
def upgrade_movie():
|
||||
if filetype_container[0] == 'subtitle':
|
||||
filetype_container[0] = 'moviesubtitle'
|
||||
elif filetype_container[0] == 'info':
|
||||
filetype_container[0] = 'movieinfo'
|
||||
elif not filetype_container[0]:
|
||||
filetype_container[0] = 'movie'
|
||||
|
||||
def upgrade_subtitle():
|
||||
if filetype_container[0] == 'movie':
|
||||
filetype_container[0] = 'moviesubtitle'
|
||||
elif filetype_container[0] == 'episode':
|
||||
filetype_container[0] = 'episodesubtitle'
|
||||
elif not filetype_container[0]:
|
||||
filetype_container[0] = 'subtitle'
|
||||
|
||||
def upgrade_info():
|
||||
if filetype_container[0] == 'movie':
|
||||
filetype_container[0] = 'movieinfo'
|
||||
elif filetype_container[0] == 'episode':
|
||||
filetype_container[0] = 'episodeinfo'
|
||||
elif not filetype_container[0]:
|
||||
filetype_container[0] = 'info'
|
||||
|
||||
# look at the extension first
|
||||
fileext = os.path.splitext(filename)[1][1:].lower()
|
||||
if fileext in subtitle_exts:
|
||||
upgrade_subtitle()
|
||||
other = {'container': fileext}
|
||||
elif fileext in info_exts:
|
||||
upgrade_info()
|
||||
other = {'container': fileext}
|
||||
elif fileext in video_exts:
|
||||
other = {'container': fileext}
|
||||
def upgrade_subtitle():
|
||||
if 'movie' in filetype_container[0]:
|
||||
filetype_container[0] = 'moviesubtitle'
|
||||
elif 'episode' in filetype_container[0]:
|
||||
filetype_container[0] = 'episodesubtitle'
|
||||
else:
|
||||
if fileext and not options.get('name_only'):
|
||||
other = {'extension': fileext}
|
||||
filetype_container[0] = 'subtitle'
|
||||
|
||||
# check whether we are in a 'Movies', 'Tv Shows', ... folder
|
||||
folder_rexps = [
|
||||
(r'Movies?', upgrade_movie),
|
||||
(r'Films?', upgrade_movie),
|
||||
(r'Tv[ _-]?Shows?', upgrade_episode),
|
||||
(r'Series?', upgrade_episode),
|
||||
(r'Episodes?', upgrade_episode),
|
||||
]
|
||||
for frexp, upgrade_func in folder_rexps:
|
||||
frexp = re.compile(frexp, re.IGNORECASE)
|
||||
for pathgroup in mtree.children:
|
||||
if frexp.match(pathgroup.value):
|
||||
upgrade_func()
|
||||
return filetype_container[0], other
|
||||
def upgrade_info():
|
||||
if 'movie' in filetype_container[0]:
|
||||
filetype_container[0] = 'movieinfo'
|
||||
elif 'episode' in filetype_container[0]:
|
||||
filetype_container[0] = 'episodeinfo'
|
||||
else:
|
||||
filetype_container[0] = 'info'
|
||||
|
||||
# check for a few specific cases which will unintentionally make the
|
||||
# following heuristics confused (eg: OSS 117 will look like an episode,
|
||||
# season 1, epnum 17, when it is in fact a movie)
|
||||
fname = clean_string(filename).lower()
|
||||
for m in self.MOVIES:
|
||||
if m in fname:
|
||||
self.log.debug('Found in exception list of movies -> type = movie')
|
||||
upgrade_movie()
|
||||
return filetype_container[0], other
|
||||
for s in self.SERIES:
|
||||
if s in fname:
|
||||
self.log.debug('Found in exception list of series -> type = episode')
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
def upgrade(type='unknown'):
|
||||
if filetype_container[0] == 'autodetect':
|
||||
filetype_container[0] = type
|
||||
|
||||
# now look whether there are some specific hints for episode vs movie
|
||||
# if we have an episode_rexp (eg: s02e13), it is an episode
|
||||
episode_transformer = get_transformer('guess_episodes_rexps')
|
||||
if episode_transformer:
|
||||
guess = episode_transformer.guess_episodes_rexps(filename)
|
||||
if guess:
|
||||
self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess)
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
|
||||
properties_transformer = get_transformer('guess_properties')
|
||||
if properties_transformer:
|
||||
# if we have certain properties characteristic of episodes, it is an ep
|
||||
found = properties_transformer.container.find_properties(filename, mtree, 'episodeFormat')
|
||||
guess = properties_transformer.container.as_guess(found, filename)
|
||||
if guess:
|
||||
self.log.debug('Found characteristic property of episodes: %s"', guess)
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
# look at the extension first
|
||||
fileext = os.path.splitext(filename)[1][1:].lower()
|
||||
if fileext in subtitle_exts:
|
||||
upgrade_subtitle()
|
||||
other = { 'container': fileext }
|
||||
elif fileext in info_exts:
|
||||
upgrade_info()
|
||||
other = { 'container': fileext }
|
||||
elif fileext in video_exts:
|
||||
upgrade(type='video')
|
||||
other = { 'container': fileext }
|
||||
else:
|
||||
upgrade(type='unknown')
|
||||
other = { 'extension': fileext }
|
||||
|
||||
found = properties_transformer.container.find_properties(filename, mtree, 'format')
|
||||
guess = properties_transformer.container.as_guess(found, filename)
|
||||
if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'):
|
||||
# Use weak episodes only if TV or WEB source
|
||||
weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
|
||||
if weak_episode_transformer:
|
||||
guess = weak_episode_transformer.guess_weak_episodes_rexps(filename)
|
||||
if guess:
|
||||
self.log.debug('Found guess_weak_episodes_rexps: %s -> type = episode', guess)
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
|
||||
website_transformer = get_transformer('guess_website')
|
||||
if website_transformer:
|
||||
found = website_transformer.container.find_properties(filename, mtree, 'website')
|
||||
guess = website_transformer.container.as_guess(found, filename)
|
||||
if guess:
|
||||
for namepart in ('tv', 'serie', 'episode'):
|
||||
if namepart in guess['website']:
|
||||
# origin-specific type
|
||||
self.log.debug('Found characteristic property of episodes: %s', guess)
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
|
||||
if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts):
|
||||
# if no episode info found, assume it's a movie
|
||||
self.log.debug('Nothing characteristic found, assuming type = movie')
|
||||
# check whether we are in a 'Movies', 'Tv Shows', ... folder
|
||||
folder_rexps = [ (r'Movies?', upgrade_movie),
|
||||
(r'Tv[ _-]?Shows?', upgrade_episode),
|
||||
(r'Series', upgrade_episode)
|
||||
]
|
||||
for frexp, upgrade_func in folder_rexps:
|
||||
frexp = re.compile(frexp, re.IGNORECASE)
|
||||
for pathgroup in mtree.children:
|
||||
if frexp.match(pathgroup.value):
|
||||
upgrade_func()
|
||||
|
||||
# check for a few specific cases which will unintentionally make the
|
||||
# following heuristics confused (eg: OSS 117 will look like an episode,
|
||||
# season 1, epnum 17, when it is in fact a movie)
|
||||
fname = clean_string(filename).lower()
|
||||
for m in MOVIES:
|
||||
if m in fname:
|
||||
log.debug('Found in exception list of movies -> type = movie')
|
||||
upgrade_movie()
|
||||
for s in SERIES:
|
||||
if s in fname:
|
||||
log.debug('Found in exception list of series -> type = episode')
|
||||
upgrade_episode()
|
||||
|
||||
if not filetype_container[0]:
|
||||
self.log.debug('Nothing characteristic found, assuming type = unknown')
|
||||
filetype_container[0] = 'unknown'
|
||||
# now look whether there are some specific hints for episode vs movie
|
||||
if filetype_container[0] in ('video', 'subtitle', 'info'):
|
||||
# if we have an episode_rexp (eg: s02e13), it is an episode
|
||||
for rexp, _, _ in episode_rexps:
|
||||
match = re.search(rexp, filename, re.IGNORECASE)
|
||||
if match:
|
||||
log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group())
|
||||
upgrade_episode()
|
||||
break
|
||||
|
||||
return filetype_container[0], other
|
||||
# if we have a 3-4 digit number that's not a year, maybe an episode
|
||||
match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
|
||||
if match:
|
||||
fullnumber = int(match.group()[1:-1])
|
||||
#season = fullnumber // 100
|
||||
epnumber = fullnumber % 100
|
||||
possible = True
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""guess the file type now (will be useful later)
|
||||
"""
|
||||
filetype, other = self.guess_filetype(mtree, options)
|
||||
# check for validity
|
||||
if epnumber > 40:
|
||||
possible = False
|
||||
if valid_year(fullnumber):
|
||||
possible = False
|
||||
|
||||
mtree.guess.set('type', filetype, confidence=1.0)
|
||||
log_found_guess(mtree.guess)
|
||||
if possible:
|
||||
log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group())
|
||||
upgrade_episode()
|
||||
|
||||
filetype_info = Guess(other, confidence=1.0)
|
||||
# guess the mimetype of the filename
|
||||
# TODO: handle other mimetypes not found on the default type_maps
|
||||
# mimetypes.types_map['.srt']='text/subtitle'
|
||||
mime, _ = mimetypes.guess_type(mtree.string, strict=False)
|
||||
if mime is not None:
|
||||
filetype_info.update({'mimetype': mime}, confidence=1.0)
|
||||
# if we have certain properties characteristic of episodes, it is an ep
|
||||
for prop, value, _, _ in find_properties(filename):
|
||||
log.debug('prop: %s = %s' % (prop, value))
|
||||
if prop == 'episodeFormat':
|
||||
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
|
||||
upgrade_episode()
|
||||
break
|
||||
|
||||
node_ext = mtree.node_at((-1,))
|
||||
found_guess(node_ext, filetype_info)
|
||||
elif compute_canonical_form('format', value) == 'DVB':
|
||||
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
|
||||
upgrade_episode()
|
||||
break
|
||||
|
||||
if mtree.guess.get('type') in [None, 'unknown']:
|
||||
if options.get('name_only'):
|
||||
mtree.guess.set('type', 'movie', confidence=0.6)
|
||||
else:
|
||||
raise TransformerException(__name__, 'Unknown file type')
|
||||
# origin-specific type
|
||||
if 'tvu.org.ru' in filename:
|
||||
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
|
||||
upgrade_episode()
|
||||
|
||||
# if no episode info found, assume it's a movie
|
||||
log.debug('Nothing characteristic found, assuming type = movie')
|
||||
upgrade_movie()
|
||||
|
||||
filetype = filetype_container[0]
|
||||
return filetype, other
|
||||
|
||||
|
||||
def process(mtree, filetype='autodetect'):
|
||||
filetype, other = guess_filetype(mtree, filetype)
|
||||
|
||||
mtree.guess.set('type', filetype, confidence=1.0)
|
||||
log.debug('Found with confidence %.2f: %s' % (1.0, mtree.guess))
|
||||
|
||||
filetype_info = Guess(other, confidence=1.0)
|
||||
# guess the mimetype of the filename
|
||||
# TODO: handle other mimetypes not found on the default type_maps
|
||||
# mimetypes.types_map['.srt']='text/subtitle'
|
||||
mime, _ = mimetypes.guess_type(mtree.string, strict=False)
|
||||
if mime is not None:
|
||||
filetype_info.update({'mimetype': mime}, confidence=1.0)
|
||||
|
||||
node_ext = mtree.node_at((-1,))
|
||||
node_ext.guess = filetype_info
|
||||
log.debug('Found with confidence %.2f: %s' % (1.0, node_ext.guess))
|
||||
|
|
|
@ -18,52 +18,54 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import find_properties
|
||||
import re
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessIdnumber(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -180)
|
||||
|
||||
def supported_properties(self):
|
||||
return ['idNumber']
|
||||
|
||||
_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
|
||||
|
||||
def guess_idnumber(self, string, node=None, options=None):
|
||||
match = self._idnum.search(string)
|
||||
if match is not None:
|
||||
result = match.groupdict()
|
||||
switch_count = 0
|
||||
DIGIT = 0
|
||||
LETTER = 1
|
||||
OTHER = 2
|
||||
last = LETTER
|
||||
for c in result['idNumber']:
|
||||
if c in '0123456789':
|
||||
ci = DIGIT
|
||||
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||
ci = LETTER
|
||||
else:
|
||||
ci = OTHER
|
||||
|
||||
if ci != last:
|
||||
switch_count += 1
|
||||
|
||||
last = ci
|
||||
|
||||
switch_ratio = float(switch_count) / len(result['idNumber'])
|
||||
|
||||
# only return the result as probable if we alternate often between
|
||||
# char type (more likely for hash values than for common words)
|
||||
if switch_ratio > 0.4:
|
||||
return result, match.span()
|
||||
|
||||
def guess_properties(string):
|
||||
try:
|
||||
prop, value, pos, end = find_properties(string)[0]
|
||||
return { prop: value }, (pos, end)
|
||||
except IndexError:
|
||||
return None, None
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_idnumber, 0.4, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{10,})') # 1.0, (0, 0))
|
||||
|
||||
def guess_idnumber(string):
|
||||
match = _idnum.search(string)
|
||||
if match is not None:
|
||||
result = match.groupdict()
|
||||
switch_count = 0
|
||||
DIGIT = 0
|
||||
LETTER = 1
|
||||
OTHER = 2
|
||||
last = LETTER
|
||||
for c in result['idNumber']:
|
||||
if c in '0123456789':
|
||||
ci = DIGIT
|
||||
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||
ci = LETTER
|
||||
else:
|
||||
ci = OTHER
|
||||
|
||||
if ci != last:
|
||||
switch_count += 1
|
||||
|
||||
last = ci
|
||||
|
||||
switch_ratio = float(switch_count) / len(result['idNumber'])
|
||||
|
||||
# only return the result as probable if we alternate often between
|
||||
# char type (more likely for hash values than for common words)
|
||||
if switch_ratio > 0.4:
|
||||
return result, match.span()
|
||||
|
||||
return None, None
|
||||
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_idnumber, 0.4, log).process(mtree)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,152 +18,38 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.language import search_language
|
||||
import logging
|
||||
|
||||
from guessit.language import search_language, subtitle_prefixes, subtitle_suffixes
|
||||
from guessit.patterns.extension import subtitle_exts
|
||||
from guessit.textutils import clean_string, find_words
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessLanguage(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 30)
|
||||
def guess_language(string, node, skip=None):
|
||||
if skip:
|
||||
relative_skip = []
|
||||
for entry in skip:
|
||||
node_idx = entry['node_idx']
|
||||
span = entry['span']
|
||||
if node_idx == node.node_idx[:len(node_idx)]:
|
||||
relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1)
|
||||
relative_skip.append(relative_span)
|
||||
skip = relative_skip
|
||||
|
||||
def supported_properties(self):
|
||||
return ['language', 'subtitleLanguage']
|
||||
language, span, confidence = search_language(string, skip=skip)
|
||||
if language:
|
||||
return (Guess({'language': language},
|
||||
confidence=confidence,
|
||||
raw= string[span[0]:span[1]]),
|
||||
span)
|
||||
|
||||
def guess_language(self, string, node=None, options=None):
|
||||
guess = search_language(string)
|
||||
return guess
|
||||
return None, None
|
||||
|
||||
def _skip_language_on_second_pass(self, mtree, node):
|
||||
"""Check if found node is a valid language node, or if it's a false positive.
|
||||
guess_language.use_node = True
|
||||
|
||||
:param mtree: Tree detected on first pass.
|
||||
:type mtree: :class:`guessit.matchtree.MatchTree`
|
||||
:param node: Node that contains a language Guess
|
||||
:type node: :class:`guessit.matchtree.MatchTree`
|
||||
|
||||
:return: True if a second pass skipping this node is required
|
||||
:rtype: bool
|
||||
"""
|
||||
unidentified_starts = {}
|
||||
unidentified_ends = {}
|
||||
|
||||
property_starts = {}
|
||||
property_ends = {}
|
||||
|
||||
title_starts = {}
|
||||
title_ends = {}
|
||||
|
||||
for unidentified_node in mtree.unidentified_leaves():
|
||||
unidentified_starts[unidentified_node.span[0]] = unidentified_node
|
||||
unidentified_ends[unidentified_node.span[1]] = unidentified_node
|
||||
|
||||
for property_node in mtree.leaves_containing('year'):
|
||||
property_starts[property_node.span[0]] = property_node
|
||||
property_ends[property_node.span[1]] = property_node
|
||||
|
||||
for title_node in mtree.leaves_containing(['title', 'series']):
|
||||
title_starts[title_node.span[0]] = title_node
|
||||
title_ends[title_node.span[1]] = title_node
|
||||
|
||||
return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\
|
||||
node.span[1] in title_starts.keys() and (node.span[0] == 0 or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys())
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
m = mtree.matched()
|
||||
to_skip_language_nodes = []
|
||||
|
||||
for lang_key in ('language', 'subtitleLanguage'):
|
||||
langs = {}
|
||||
lang_nodes = set(n for n in mtree.leaves_containing(lang_key))
|
||||
|
||||
for lang_node in lang_nodes:
|
||||
lang = lang_node.guess.get(lang_key, None)
|
||||
if self._skip_language_on_second_pass(mtree, lang_node):
|
||||
# Language probably split the title. Add to skip for 2nd pass.
|
||||
|
||||
# if filetype is subtitle and the language appears last, just before
|
||||
# the extension, then it is likely a subtitle language
|
||||
parts = clean_string(lang_node.root.value).split()
|
||||
if (m.get('type') in ['moviesubtitle', 'episodesubtitle'] and
|
||||
(parts.index(lang_node.value) == len(parts) - 2)):
|
||||
continue
|
||||
|
||||
to_skip_language_nodes.append(lang_node)
|
||||
elif not lang in langs:
|
||||
langs[lang] = lang_node
|
||||
else:
|
||||
# The same language was found. Keep the more confident one,
|
||||
# and add others to skip for 2nd pass.
|
||||
existing_lang_node = langs[lang]
|
||||
to_skip = None
|
||||
if (existing_lang_node.guess.confidence('language') >=
|
||||
lang_node.guess.confidence('language')):
|
||||
# lang_node is to remove
|
||||
to_skip = lang_node
|
||||
else:
|
||||
# existing_lang_node is to remove
|
||||
langs[lang] = lang_node
|
||||
to_skip = existing_lang_node
|
||||
to_skip_language_nodes.append(to_skip)
|
||||
|
||||
if to_skip_language_nodes:
|
||||
return {'skip_nodes': to_skip_language_nodes}
|
||||
return None
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
options = options or {}
|
||||
return 'nolanguage' not in options
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_language, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
|
||||
def promote_subtitle(self, node):
|
||||
node.guess.set('subtitleLanguage', node.guess['language'],
|
||||
confidence=node.guess.confidence('language'))
|
||||
del node.guess['language']
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
# 1- try to promote language to subtitle language where it makes sense
|
||||
for node in mtree.nodes():
|
||||
if 'language' not in node.guess:
|
||||
continue
|
||||
|
||||
# - if we matched a language in a file with a sub extension and that
|
||||
# the group is the last group of the filename, it is probably the
|
||||
# language of the subtitle
|
||||
# (eg: 'xxx.english.srt')
|
||||
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
|
||||
node == mtree.leaves()[-2]):
|
||||
self.promote_subtitle(node)
|
||||
|
||||
# - if we find in the same explicit group
|
||||
# a subtitle prefix before the language,
|
||||
# or a subtitle suffix after the language,
|
||||
# then upgrade the language
|
||||
explicit_group = mtree.node_at(node.node_idx[:2])
|
||||
group_str = explicit_group.value.lower()
|
||||
|
||||
for sub_prefix in subtitle_prefixes:
|
||||
if (sub_prefix in find_words(group_str) and
|
||||
0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])):
|
||||
self.promote_subtitle(node)
|
||||
|
||||
for sub_suffix in subtitle_suffixes:
|
||||
if (sub_suffix in find_words(group_str) and
|
||||
(node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
|
||||
self.promote_subtitle(node)
|
||||
|
||||
# - if a language is in an explicit group just preceded by "st",
|
||||
# it is a subtitle language (eg: '...st[fr-eng]...')
|
||||
try:
|
||||
idx = node.node_idx
|
||||
previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
|
||||
if previous.value.lower()[-2:] == 'st':
|
||||
self.promote_subtitle(node)
|
||||
except IndexError:
|
||||
pass
|
||||
def process(mtree, *args, **kwargs):
|
||||
SingleNodeGuesser(guess_language, None, log, *args, **kwargs).process(mtree)
|
||||
# Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,160 +18,157 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
import unicodedata
|
||||
import logging
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import found_property
|
||||
from guessit import u
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessMovieTitleFromPosition(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -200)
|
||||
def process(mtree):
|
||||
def found_property(node, name, value, confidence):
|
||||
node.guess = Guess({ name: value },
|
||||
confidence=confidence,
|
||||
raw=value)
|
||||
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
|
||||
|
||||
def supported_properties(self):
|
||||
return ['title']
|
||||
def found_title(node, confidence):
|
||||
found_property(node, 'title', node.clean_value, confidence)
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
options = options or {}
|
||||
return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode')
|
||||
basename = mtree.node_at((-2,))
|
||||
all_valid = lambda leaf: len(leaf.clean_value) > 0
|
||||
basename_leftover = basename.unidentified_leaves(valid=all_valid)
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""
|
||||
try to identify the remaining unknown groups by looking at their
|
||||
position relative to other known elements
|
||||
"""
|
||||
basename = mtree.node_at((-2,))
|
||||
all_valid = lambda leaf: len(leaf.clean_value) > 0
|
||||
basename_leftover = basename.unidentified_leaves(valid=all_valid)
|
||||
try:
|
||||
folder = mtree.node_at((-3,))
|
||||
folder_leftover = folder.unidentified_leaves()
|
||||
except ValueError:
|
||||
folder = None
|
||||
folder_leftover = []
|
||||
|
||||
try:
|
||||
log.debug('folder: %s' % folder_leftover)
|
||||
log.debug('basename: %s' % basename_leftover)
|
||||
|
||||
# specific cases:
|
||||
# if we find the same group both in the folder name and the filename,
|
||||
# it's a good candidate for title
|
||||
if (folder_leftover and basename_leftover and
|
||||
folder_leftover[0].clean_value == basename_leftover[0].clean_value):
|
||||
|
||||
found_title(folder_leftover[0], confidence=0.8)
|
||||
return
|
||||
|
||||
# specific cases:
|
||||
# if the basename contains a number first followed by an unidentified
|
||||
# group, and the folder only contains 1 unidentified one, then we have
|
||||
# a series
|
||||
# ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
|
||||
try:
|
||||
series = folder_leftover[0]
|
||||
filmNumber = basename_leftover[0]
|
||||
title = basename_leftover[1]
|
||||
|
||||
basename_leaves = basename.leaves()
|
||||
|
||||
num = int(filmNumber.clean_value)
|
||||
|
||||
log.debug('series: %s' % series.clean_value)
|
||||
log.debug('title: %s' % title.clean_value)
|
||||
if (series.clean_value != title.clean_value and
|
||||
series.clean_value != filmNumber.clean_value and
|
||||
basename_leaves.index(filmNumber) == 0 and
|
||||
basename_leaves.index(title) == 1):
|
||||
|
||||
found_title(title, confidence=0.6)
|
||||
found_property(series, 'filmSeries',
|
||||
series.clean_value, confidence=0.6)
|
||||
found_property(filmNumber, 'filmNumber',
|
||||
num, confidence=0.6)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# specific cases:
|
||||
# - movies/tttttt (yyyy)/tttttt.ccc
|
||||
try:
|
||||
if mtree.node_at((-4, 0)).value.lower() == 'movies':
|
||||
folder = mtree.node_at((-3,))
|
||||
folder_leftover = folder.unidentified_leaves()
|
||||
except ValueError:
|
||||
folder = None
|
||||
folder_leftover = []
|
||||
|
||||
self.log.debug('folder: %s' % u(folder_leftover))
|
||||
self.log.debug('basename: %s' % u(basename_leftover))
|
||||
# Note:too generic, might solve all the unittests as they all
|
||||
# contain 'movies' in their path
|
||||
#
|
||||
#if containing_folder.is_leaf() and not containing_folder.guess:
|
||||
# containing_folder.guess =
|
||||
# Guess({ 'title': clean_string(containing_folder.value) },
|
||||
# confidence=0.7)
|
||||
|
||||
# specific cases:
|
||||
# if we find the same group both in the folder name and the filename,
|
||||
# it's a good candidate for title
|
||||
if (folder_leftover and basename_leftover and
|
||||
folder_leftover[0].clean_value == basename_leftover[0].clean_value):
|
||||
year_group = folder.first_leaf_containing('year')
|
||||
groups_before = folder.previous_unidentified_leaves(year_group)
|
||||
|
||||
found_property(folder_leftover[0], 'title', confidence=0.8)
|
||||
found_title(groups_before[0], confidence=0.8)
|
||||
return
|
||||
|
||||
# specific cases:
|
||||
# if the basename contains a number first followed by an unidentified
|
||||
# group, and the folder only contains 1 unidentified one, then we have
|
||||
# a series
|
||||
# ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
|
||||
try:
|
||||
series = folder_leftover[0]
|
||||
filmNumber = basename_leftover[0]
|
||||
title = basename_leftover[1]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
basename_leaves = basename.leaves()
|
||||
# if we have either format or videoCodec in the folder containing the file
|
||||
# or one of its parents, then we should probably look for the title in
|
||||
# there rather than in the basename
|
||||
try:
|
||||
props = mtree.previous_leaves_containing(mtree.children[-2],
|
||||
[ 'videoCodec', 'format',
|
||||
'language' ])
|
||||
except IndexError:
|
||||
props = []
|
||||
|
||||
num = int(filmNumber.clean_value)
|
||||
if props:
|
||||
group_idx = props[0].node_idx[0]
|
||||
if all(g.node_idx[0] == group_idx for g in props):
|
||||
# if they're all in the same group, take leftover info from there
|
||||
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
|
||||
|
||||
self.log.debug('series: %s' % series.clean_value)
|
||||
self.log.debug('title: %s' % title.clean_value)
|
||||
if (series.clean_value != title.clean_value and
|
||||
series.clean_value != filmNumber.clean_value and
|
||||
basename_leaves.index(filmNumber) == 0 and
|
||||
basename_leaves.index(title) == 1):
|
||||
|
||||
found_property(title, 'title', confidence=0.6)
|
||||
found_property(series, 'filmSeries', confidence=0.6)
|
||||
found_property(filmNumber, 'filmNumber', num, confidence=0.6)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# specific cases:
|
||||
# - movies/tttttt (yyyy)/tttttt.ccc
|
||||
try:
|
||||
if mtree.node_at((-4, 0)).value.lower() == 'movies':
|
||||
folder = mtree.node_at((-3,))
|
||||
|
||||
# Note:too generic, might solve all the unittests as they all
|
||||
# contain 'movies' in their path
|
||||
#
|
||||
# if containing_folder.is_leaf() and not containing_folder.guess:
|
||||
# containing_folder.guess =
|
||||
# Guess({ 'title': clean_string(containing_folder.value) },
|
||||
# confidence=0.7)
|
||||
|
||||
year_group = folder.first_leaf_containing('year')
|
||||
groups_before = folder.previous_unidentified_leaves(year_group)
|
||||
|
||||
found_property(groups_before[0], 'title', confidence=0.8)
|
||||
if leftover:
|
||||
found_title(leftover[0], confidence=0.7)
|
||||
return
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
# look for title in basename if there are some remaining undidentified
|
||||
# groups there
|
||||
if basename_leftover:
|
||||
title_candidate = basename_leftover[0]
|
||||
|
||||
# if we have either format or videoCodec in the folder containing the file
|
||||
# or one of its parents, then we should probably look for the title in
|
||||
# there rather than in the basename
|
||||
try:
|
||||
props = mtree.previous_leaves_containing(mtree.children[-2],
|
||||
['videoCodec', 'format',
|
||||
'language'])
|
||||
except IndexError:
|
||||
props = []
|
||||
# if basename is only one word and the containing folder has at least
|
||||
# 3 words in it, we should take the title from the folder name
|
||||
# ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
|
||||
# ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
|
||||
if (title_candidate.clean_value.count(' ') == 0 and
|
||||
folder_leftover and
|
||||
folder_leftover[0].clean_value.count(' ') >= 2):
|
||||
|
||||
if props:
|
||||
group_idx = props[0].node_idx[0]
|
||||
if all(g.node_idx[0] == group_idx for g in props):
|
||||
# if they're all in the same group, take leftover info from there
|
||||
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
|
||||
|
||||
if leftover:
|
||||
found_property(leftover[0], 'title', confidence=0.7)
|
||||
return
|
||||
|
||||
# look for title in basename if there are some remaining unidentified
|
||||
# groups there
|
||||
if basename_leftover:
|
||||
# if basename is only one word and the containing folder has at least
|
||||
# 3 words in it, we should take the title from the folder name
|
||||
# ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
|
||||
# ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
|
||||
if (basename_leftover[0].clean_value.count(' ') == 0 and
|
||||
folder_leftover and
|
||||
folder_leftover[0].clean_value.count(' ') >= 2):
|
||||
|
||||
found_property(folder_leftover[0], 'title', confidence=0.7)
|
||||
return
|
||||
|
||||
# if there are only many unidentified groups, take the first of which is
|
||||
# not inside brackets or parentheses.
|
||||
# ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
|
||||
if basename_leftover[0].is_explicit():
|
||||
for basename_leftover_elt in basename_leftover:
|
||||
if not basename_leftover_elt.is_explicit():
|
||||
found_property(basename_leftover_elt, 'title', confidence=0.8)
|
||||
return
|
||||
|
||||
# if all else fails, take the first remaining unidentified group in the
|
||||
# basename as title
|
||||
found_property(basename_leftover[0], 'title', confidence=0.6)
|
||||
found_title(folder_leftover[0], confidence=0.7)
|
||||
return
|
||||
|
||||
# if there are no leftover groups in the basename, look in the folder name
|
||||
if folder_leftover:
|
||||
found_property(folder_leftover[0], 'title', confidence=0.5)
|
||||
# if there are only 2 unidentified groups, the first of which is inside
|
||||
# brackets or parentheses, we take the second one for the title:
|
||||
# ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
|
||||
if len(basename_leftover) == 2 and basename_leftover[0].is_explicit():
|
||||
found_title(basename_leftover[1], confidence=0.8)
|
||||
return
|
||||
|
||||
# if nothing worked, look if we have a very small group at the beginning
|
||||
# of the basename
|
||||
basename = mtree.node_at((-2,))
|
||||
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
|
||||
if basename_leftover:
|
||||
found_property(basename_leftover[0], 'title', confidence=0.4)
|
||||
return
|
||||
# if all else fails, take the first remaining unidentified group in the
|
||||
# basename as title
|
||||
found_title(title_candidate, confidence=0.6)
|
||||
return
|
||||
|
||||
# if there are no leftover groups in the basename, look in the folder name
|
||||
if folder_leftover:
|
||||
found_title(folder_leftover[0], confidence=0.5)
|
||||
return
|
||||
|
||||
# if nothing worked, look if we have a very small group at the beginning
|
||||
# of the basename
|
||||
basename = mtree.node_at((-2,))
|
||||
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
|
||||
if basename_leftover:
|
||||
found_title(basename_leftover[0], confidence=0.4)
|
||||
return
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,213 +18,21 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import find_properties
|
||||
import logging
|
||||
|
||||
from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer
|
||||
from guessit.patterns.extension import subtitle_exts, video_exts, info_exts
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessProperties(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 35)
|
||||
def guess_properties(string):
|
||||
try:
|
||||
prop, value, pos, end = find_properties(string)[0]
|
||||
return { prop: value }, (pos, end)
|
||||
except IndexError:
|
||||
return None, None
|
||||
|
||||
self.container = PropertiesContainer()
|
||||
self.qualities = QualitiesContainer()
|
||||
|
||||
def register_property(propname, props):
|
||||
"""props a dict of {value: [patterns]}"""
|
||||
for canonical_form, patterns in props.items():
|
||||
if isinstance(patterns, tuple):
|
||||
patterns2, kwargs = patterns
|
||||
kwargs = dict(kwargs)
|
||||
kwargs['canonical_form'] = canonical_form
|
||||
self.container.register_property(propname, *patterns2, **kwargs)
|
||||
|
||||
else:
|
||||
self.container.register_property(propname, *patterns, canonical_form=canonical_form)
|
||||
|
||||
def register_quality(propname, quality_dict):
|
||||
"""props a dict of {canonical_form: quality}"""
|
||||
for canonical_form, quality in quality_dict.items():
|
||||
self.qualities.register_quality(propname, canonical_form, quality)
|
||||
|
||||
register_property('container', {'mp4': ['MP4']})
|
||||
|
||||
# http://en.wikipedia.org/wiki/Pirated_movie_release_types
|
||||
register_property('format', {'VHS': ['VHS'],
|
||||
'Cam': ['CAM', 'CAMRip'],
|
||||
'Telesync': ['TELESYNC', 'PDVD'],
|
||||
'Telesync': (['TS'], {'confidence': 0.2}),
|
||||
'Workprint': ['WORKPRINT', 'WP'],
|
||||
'Telecine': ['TELECINE', 'TC'],
|
||||
'PPV': ['PPV', 'PPV-Rip'], # Pay Per View
|
||||
'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
|
||||
'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
|
||||
'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS'],
|
||||
'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
|
||||
'VOD': ['VOD', 'VOD-Rip'],
|
||||
'WEBRip': ['WEB-Rip'],
|
||||
'WEB-DL': ['WEB-DL'],
|
||||
'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
|
||||
'BluRay': ['Blu-ray', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
|
||||
})
|
||||
|
||||
register_quality('format', {'VHS': -100,
|
||||
'Cam': -90,
|
||||
'Telesync': -80,
|
||||
'Workprint': -70,
|
||||
'Telecine': -60,
|
||||
'PPV': -50,
|
||||
'TV': -30,
|
||||
'DVB': -20,
|
||||
'DVD': 0,
|
||||
'HDTV': 20,
|
||||
'VOD': 40,
|
||||
'WEBRip': 50,
|
||||
'WEB-DL': 60,
|
||||
'HD-DVD': 80,
|
||||
'BluRay': 100
|
||||
})
|
||||
|
||||
register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
|
||||
'368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
|
||||
'480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
|
||||
'480p': (['hr'], {'confidence': 0.2}),
|
||||
'576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
|
||||
'720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
|
||||
'900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
|
||||
'1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
|
||||
'1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080(?:p?x?)'],
|
||||
'4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
|
||||
})
|
||||
|
||||
register_quality('screenSize', {'360p': -300,
|
||||
'368p': -200,
|
||||
'480p': -100,
|
||||
'576p': 0,
|
||||
'720p': 100,
|
||||
'900p': 130,
|
||||
'1080i': 180,
|
||||
'1080p': 200,
|
||||
'4K': 400
|
||||
})
|
||||
|
||||
_videoCodecProperty = {'Real': ['Rv\d{2}'], # http://en.wikipedia.org/wiki/RealVideo
|
||||
'Mpeg2': ['Mpeg2'],
|
||||
'DivX': ['DVDivX', 'DivX'],
|
||||
'XviD': ['XviD'],
|
||||
'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
|
||||
'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
|
||||
}
|
||||
|
||||
register_property('videoCodec', _videoCodecProperty)
|
||||
|
||||
register_quality('videoCodec', {'Real': -50,
|
||||
'Mpeg2': -30,
|
||||
'DivX': -10,
|
||||
'XviD': 0,
|
||||
'h264': 100,
|
||||
'h265': 150
|
||||
})
|
||||
|
||||
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
|
||||
# http://fr.wikipedia.org/wiki/H.264
|
||||
self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
|
||||
register_quality('videoProfile', {'BP': -20,
|
||||
'XP': -10,
|
||||
'MP': 0,
|
||||
'HP': 10,
|
||||
'10bit': 15,
|
||||
'Hi422P': 25,
|
||||
'Hi444PP': 35
|
||||
})
|
||||
|
||||
# has nothing to do here (or on filenames for that matter), but some
|
||||
# releases use it and it helps to identify release groups, so we adapt
|
||||
register_property('videoApi', {'DXVA': ['DXVA']})
|
||||
|
||||
register_property('audioCodec', {'MP3': ['MP3'],
|
||||
'DolbyDigital': ['DD'],
|
||||
'AAC': ['AAC'],
|
||||
'AC3': ['AC3'],
|
||||
'Flac': ['FLAC'],
|
||||
'DTS': ['DTS'],
|
||||
'TrueHD': ['True-HD']
|
||||
})
|
||||
|
||||
register_quality('audioCodec', {'MP3': 10,
|
||||
'DolbyDigital': 30,
|
||||
'AAC': 35,
|
||||
'AC3': 40,
|
||||
'Flac': 45,
|
||||
'DTS': 60,
|
||||
'TrueHD': 70
|
||||
})
|
||||
|
||||
self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
|
||||
self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
|
||||
self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
|
||||
self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
|
||||
self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))
|
||||
|
||||
register_quality('audioProfile', {'HD': 20,
|
||||
'HDMA': 50,
|
||||
'LC': 0,
|
||||
'HQ': 0,
|
||||
'HE': 20
|
||||
})
|
||||
|
||||
register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch'],
|
||||
'5.1': ['5[\W_]1', '5ch'],
|
||||
'2.0': ['2[\W_]0', '2ch', 'stereo'],
|
||||
'1.0': ['1[\W_]0', '1ch', 'mono']
|
||||
})
|
||||
|
||||
register_quality('audioChannels', {'7.1': 200,
|
||||
'5.1': 100,
|
||||
'2.0': 0,
|
||||
'1.0': -100
|
||||
})
|
||||
|
||||
self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')
|
||||
|
||||
register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
|
||||
'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
|
||||
'DualAudio': ['Dual-Audio'],
|
||||
'WideScreen': ['ws', 'wide-screen'],
|
||||
})
|
||||
|
||||
self.container.register_property('other', 'Real', 'Fix', canonical_form="Proper", validator=WeakValidator())
|
||||
self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form="Proper")
|
||||
|
||||
self.container.register_canonical_properties('other', 'R5', 'Screener', '3D', 'HD', 'HQ', 'DDC')
|
||||
self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())
|
||||
|
||||
for prop in self.container.get_properties('format'):
|
||||
self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')
|
||||
|
||||
for exts in (subtitle_exts, info_exts, video_exts):
|
||||
for container in exts:
|
||||
self.container.register_property('container', container, confidence=0.3)
|
||||
|
||||
def guess_properties(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node)
|
||||
return self.container.as_guess(found, string)
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
|
||||
def rate_quality(self, guess, *props):
|
||||
return self.qualities.rate_quality(guess, *props)
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_properties, 1.0, log).process(mtree)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,132 +18,69 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import prop_multi, compute_canonical_form, _dash, _psep
|
||||
import re
|
||||
import logging
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder, found_property, found_guess
|
||||
from guessit.containers import PropertiesContainer
|
||||
from guessit.patterns import sep
|
||||
from guessit.guess import Guess
|
||||
from guessit.textutils import strip_brackets
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def get_patterns(property_name):
|
||||
return [ p.replace(_dash, _psep) for patterns in prop_multi[property_name].values() for p in patterns ]
|
||||
|
||||
CODECS = get_patterns('videoCodec')
|
||||
FORMATS = get_patterns('format')
|
||||
VAPIS = get_patterns('videoApi')
|
||||
|
||||
# RG names following a codec or format, with a potential space or dash inside the name
|
||||
GROUP_NAMES = [ r'(?P<videoCodec>' + codec + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
|
||||
for codec in CODECS ]
|
||||
GROUP_NAMES += [ r'(?P<format>' + fmt + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
|
||||
for fmt in FORMATS ]
|
||||
GROUP_NAMES += [ r'(?P<videoApi>' + api + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
|
||||
for api in VAPIS ]
|
||||
|
||||
GROUP_NAMES2 = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
|
||||
for codec in CODECS ]
|
||||
GROUP_NAMES2 += [ r'\.(?P<format>' + fmt + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
|
||||
for fmt in FORMATS ]
|
||||
GROUP_NAMES2 += [ r'\.(?P<videoApi>' + vapi + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
|
||||
for vapi in VAPIS ]
|
||||
|
||||
GROUP_NAMES = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES ]
|
||||
GROUP_NAMES2 = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES2 ]
|
||||
|
||||
def adjust_metadata(md):
|
||||
return dict((property_name, compute_canonical_form(property_name, value) or value)
|
||||
for property_name, value in md.items())
|
||||
|
||||
|
||||
class GuessReleaseGroup(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -190)
|
||||
self.container = PropertiesContainer(canonical_from_pattern=False)
|
||||
self._allowed_groupname_pattern = '[\w@#€£$&]'
|
||||
self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
|
||||
lambda elt: self._is_number(elt),
|
||||
]
|
||||
# If the previous property in this list, the match will be considered as safe
|
||||
# and group name can contain a separator.
|
||||
self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels']
|
||||
def guess_release_group(string):
|
||||
# first try to see whether we have both a known codec and a known release group
|
||||
for rexp in GROUP_NAMES:
|
||||
match = rexp.search(string)
|
||||
while match:
|
||||
metadata = match.groupdict()
|
||||
# make sure this is an actual release group we caught
|
||||
release_group = (compute_canonical_form('releaseGroup', metadata['releaseGroup']) or
|
||||
compute_canonical_form('weakReleaseGroup', metadata['releaseGroup']))
|
||||
if release_group:
|
||||
return adjust_metadata(metadata), (match.start(1), match.end(2))
|
||||
|
||||
self.container.sep_replace_char = '-'
|
||||
self.container.canonical_from_pattern = False
|
||||
self.container.enhance = True
|
||||
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+')
|
||||
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')
|
||||
# we didn't find anything conclusive, keep searching
|
||||
match = rexp.search(string, match.span()[0]+1)
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
# pick anything as releaseGroup as long as we have a codec in front
|
||||
# this doesn't include a potential dash ('-') ending the release group
|
||||
# eg: [...].X264-HiS@SiLUHD-English.[...]
|
||||
for rexp in GROUP_NAMES2:
|
||||
match = rexp.search(string)
|
||||
if match:
|
||||
return adjust_metadata(match.groupdict()), (match.start(1), match.end(2))
|
||||
|
||||
def _is_number(self, s):
|
||||
try:
|
||||
int(s)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
return None, None
|
||||
|
||||
def validate_group_name(self, guess):
|
||||
val = guess['releaseGroup']
|
||||
if len(val) >= 2:
|
||||
|
||||
if '-' in val:
|
||||
checked_val = ""
|
||||
for elt in val.split('-'):
|
||||
forbidden = False
|
||||
for forbidden_lambda in self._forbidden_groupname_lambda:
|
||||
forbidden = forbidden_lambda(elt.lower())
|
||||
if forbidden:
|
||||
break
|
||||
if not forbidden:
|
||||
if checked_val:
|
||||
checked_val += '-'
|
||||
checked_val += elt
|
||||
else:
|
||||
break
|
||||
val = checked_val
|
||||
if not val:
|
||||
return False
|
||||
guess['releaseGroup'] = val
|
||||
|
||||
forbidden = False
|
||||
for forbidden_lambda in self._forbidden_groupname_lambda:
|
||||
forbidden = forbidden_lambda(val.lower())
|
||||
if forbidden:
|
||||
break
|
||||
if not forbidden:
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_leaf_previous(self, leaf, node):
|
||||
if leaf.span[1] <= node.span[0]:
|
||||
for idx in range(leaf.span[1], node.span[0]):
|
||||
if not leaf.root.value[idx] in sep:
|
||||
return False
|
||||
return True
|
||||
return False
|
||||
|
||||
def guess_release_group(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node, 'releaseGroup')
|
||||
guess = self.container.as_guess(found, string, self.validate_group_name, sep_replacement='-')
|
||||
validated_guess = None
|
||||
if guess:
|
||||
explicit_group_node = node.group_node()
|
||||
if explicit_group_node:
|
||||
for leaf in explicit_group_node.leaves_containing(self.previous_safe_properties):
|
||||
if self.is_leaf_previous(leaf, node):
|
||||
if leaf.root.value[leaf.span[1]] == '-':
|
||||
guess.metadata().confidence = 1
|
||||
else:
|
||||
guess.metadata().confidence = 0.7
|
||||
validated_guess = guess
|
||||
|
||||
if not validated_guess:
|
||||
# If previous group last leaf is identified as a safe property,
|
||||
# consider the raw value as a releaseGroup
|
||||
previous_group_node = node.previous_group_node()
|
||||
if previous_group_node:
|
||||
for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
|
||||
if self.is_leaf_previous(leaf, node):
|
||||
guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
|
||||
if self.validate_group_name(guess):
|
||||
node.guess = guess
|
||||
validated_guess = guess
|
||||
|
||||
if validated_guess:
|
||||
# If following group nodes have only one unidentified leaf, it belongs to the release group
|
||||
next_group_node = node
|
||||
|
||||
while True:
|
||||
next_group_node = next_group_node.next_group_node()
|
||||
if next_group_node:
|
||||
leaves = next_group_node.leaves()
|
||||
if len(leaves) == 1 and not leaves[0].guess:
|
||||
validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
|
||||
leaves[0].guess = validated_guess
|
||||
else:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
if validated_guess:
|
||||
# Strip brackets
|
||||
validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])
|
||||
|
||||
return validated_guess
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_release_group, 0.8, log).process(mtree)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,41 +18,33 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, \
|
||||
unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import video_rexps, sep
|
||||
import re
|
||||
import logging
|
||||
|
||||
from guessit.patterns import _psep
|
||||
from guessit.containers import PropertiesContainer
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from guessit.patterns.numeral import parse_numeral
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessVideoRexps(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 25)
|
||||
def guess_video_rexps(string):
|
||||
string = '-' + string + '-'
|
||||
for rexp, confidence, span_adjust in video_rexps:
|
||||
match = re.search(sep + rexp + sep, string, re.IGNORECASE)
|
||||
if match:
|
||||
metadata = match.groupdict()
|
||||
# is this the better place to put it? (maybe, as it is at least
|
||||
# the soonest that we can catch it)
|
||||
if metadata.get('cdNumberTotal', -1) is None:
|
||||
del metadata['cdNumberTotal']
|
||||
span = (match.start() + span_adjust[0],
|
||||
match.end() + span_adjust[1] - 2)
|
||||
return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]),
|
||||
span)
|
||||
|
||||
self.container = PropertiesContainer(canonical_from_pattern=False)
|
||||
return None, None
|
||||
|
||||
self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral)
|
||||
self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral)
|
||||
|
||||
self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
|
||||
|
||||
self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
|
||||
|
||||
self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition')
|
||||
self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition')
|
||||
self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition')
|
||||
self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition')
|
||||
self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def guess_video_rexps(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node)
|
||||
return self.container.as_guess(found, string)
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_video_rexps, None, log).process(mtree)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,52 +18,45 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import weak_episode_rexps
|
||||
import re
|
||||
import logging
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from guessit.patterns import sep
|
||||
from guessit.containers import PropertiesContainer
|
||||
from guessit.patterns.numeral import numeral, parse_numeral
|
||||
from guessit.date import valid_year
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessWeakEpisodesRexps(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 15)
|
||||
def guess_weak_episodes_rexps(string, node):
|
||||
if 'episodeNumber' in node.root.info:
|
||||
return None, None
|
||||
|
||||
self.properties = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||
for rexp, span_adjust in weak_episode_rexps:
|
||||
match = re.search(rexp, string, re.IGNORECASE)
|
||||
if match:
|
||||
metadata = match.groupdict()
|
||||
span = (match.start() + span_adjust[0],
|
||||
match.end() + span_adjust[1])
|
||||
|
||||
def _formater(episodeNumber):
|
||||
epnum = parse_numeral(episodeNumber)
|
||||
if not valid_year(epnum):
|
||||
if epnum > 100:
|
||||
season, epnum = epnum // 100, epnum % 100
|
||||
# episodes which have a season > 50 are most likely errors
|
||||
# (Simpson is at 25!)
|
||||
if season > 50:
|
||||
return None
|
||||
return {'season': season, 'episodeNumber': epnum}
|
||||
else:
|
||||
return epnum
|
||||
epnum = int(metadata['episodeNumber'])
|
||||
if epnum > 100:
|
||||
season, epnum = epnum // 100, epnum % 100
|
||||
# episodes which have a season > 25 are most likely errors
|
||||
# (Simpsons is at 23!)
|
||||
if season > 25:
|
||||
continue
|
||||
return Guess({ 'season': season,
|
||||
'episodeNumber': epnum },
|
||||
confidence=0.6, raw=string[span[0]:span[1]]), span
|
||||
else:
|
||||
return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span
|
||||
|
||||
self.properties.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater)
|
||||
self.properties.register_property('episodeNumber', '(?:episode)' + sep + '(' + numeral + ')[^0-9]', confidence=0.3)
|
||||
return None, None
|
||||
|
||||
def supported_properties(self):
|
||||
return self.properties.get_supported_properties()
|
||||
|
||||
def guess_weak_episodes_rexps(self, string, node=None, options=None):
|
||||
if node and 'episodeNumber' in node.root.info:
|
||||
return None
|
||||
guess_weak_episodes_rexps.use_node = True
|
||||
|
||||
properties = self.properties.find_properties(string, node)
|
||||
guess = self.properties.as_guess(properties, string)
|
||||
|
||||
return guess
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
return mtree.guess.get('type', '').startswith('episode')
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_weak_episodes_rexps, 0.6, log).process(mtree)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,49 +18,22 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, \
|
||||
unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import websites
|
||||
import logging
|
||||
|
||||
from guessit.patterns import build_or_pattern
|
||||
from guessit.containers import PropertiesContainer
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessWebsite(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 45)
|
||||
def guess_website(string):
|
||||
low = string.lower()
|
||||
for site in websites:
|
||||
pos = low.find(site.lower())
|
||||
if pos != -1:
|
||||
return {'website': site}, (pos, pos + len(site))
|
||||
return None, None
|
||||
|
||||
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||
|
||||
tlds = []
|
||||
|
||||
f = resource_stream('guessit', 'tlds-alpha-by-domain.txt')
|
||||
f.readline()
|
||||
next(f)
|
||||
for tld in f:
|
||||
tld = tld.strip()
|
||||
if b'--' in tld:
|
||||
continue
|
||||
tlds.append(tld.decode("utf-8"))
|
||||
f.close()
|
||||
|
||||
tlds_pattern = build_or_pattern(tlds) # All registered domain extension
|
||||
safe_tlds_pattern = build_or_pattern(['com', 'org', 'net']) # For sure a website extension
|
||||
safe_subdomains_pattern = build_or_pattern(['www']) # For sure a website subdomain
|
||||
safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net']) # Those words before a tlds are sure
|
||||
|
||||
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
|
||||
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
|
||||
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def guess_website(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node, 'website')
|
||||
return self.container.as_guess(found, string)
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_website, 1.0, log).process(mtree)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,32 +18,33 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.date import search_year
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessYear(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -160)
|
||||
def guess_year(string):
|
||||
year, span = search_year(string)
|
||||
if year:
|
||||
return { 'year': year }, span
|
||||
else:
|
||||
return None, None
|
||||
|
||||
def supported_properties(self):
|
||||
return ['year']
|
||||
def guess_year_skip_first(string):
|
||||
year, span = search_year(string)
|
||||
if year:
|
||||
year2, span2 = guess_year(string[span[1]:])
|
||||
if year2:
|
||||
return year2, (span2[0]+span[1], span2[1]+span[1])
|
||||
|
||||
def guess_year(self, string, node=None, options=None):
|
||||
year, span = search_year(string)
|
||||
if year:
|
||||
return {'year': year}, span
|
||||
else:
|
||||
return None, None
|
||||
return None, None
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
year_nodes = mtree.leaves_containing('year')
|
||||
if len(year_nodes) > 1:
|
||||
return {'skip_nodes': year_nodes[:len(year_nodes) - 1]}
|
||||
return None
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_year, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
def process(mtree, skip_first_year=False):
|
||||
if skip_first_year:
|
||||
SingleNodeGuesser(guess_year_skip_first, 1.0, log).process(mtree)
|
||||
else:
|
||||
SingleNodeGuesser(guess_year, 1.0, log).process(mtree)
|
||||
|
|
73
lib/guessit/transfo/post_process.py
Normal file
73
lib/guessit/transfo/post_process.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.patterns import subtitle_exts
|
||||
from guessit.textutils import reorder_title, find_words
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process(mtree):
|
||||
# 1- try to promote language to subtitle language where it makes sense
|
||||
for node in mtree.nodes():
|
||||
if 'language' not in node.guess:
|
||||
continue
|
||||
|
||||
def promote_subtitle():
|
||||
# pylint: disable=W0631
|
||||
node.guess.set('subtitleLanguage', node.guess['language'],
|
||||
confidence=node.guess.confidence('language'))
|
||||
del node.guess['language']
|
||||
|
||||
# - if we matched a language in a file with a sub extension and that
|
||||
# the group is the last group of the filename, it is probably the
|
||||
# language of the subtitle
|
||||
# (eg: 'xxx.english.srt')
|
||||
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
|
||||
node == mtree.leaves()[-2]):
|
||||
promote_subtitle()
|
||||
|
||||
# - if we find the word 'sub' before the language, and in the same explicit
|
||||
# group, then upgrade the language
|
||||
explicit_group = mtree.node_at(node.node_idx[:2])
|
||||
group_str = explicit_group.value.lower()
|
||||
|
||||
if ('sub' in find_words(group_str) and
|
||||
0 <= group_str.find('sub') < (node.span[0] - explicit_group.span[0])):
|
||||
promote_subtitle()
|
||||
|
||||
# - if a language is in an explicit group just preceded by "st",
|
||||
# it is a subtitle language (eg: '...st[fr-eng]...')
|
||||
try:
|
||||
idx = node.node_idx
|
||||
previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
|
||||
if previous.value.lower()[-2:] == 'st':
|
||||
promote_subtitle()
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
# 2- ", the" at the end of a series title should be prepended to it
|
||||
for node in mtree.nodes():
|
||||
if 'series' not in node.guess:
|
||||
continue
|
||||
|
||||
node.guess['series'] = reorder_title(node.guess['series'])
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,32 +18,27 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from __future__ import unicode_literals
|
||||
from guessit.textutils import find_first_level_groups
|
||||
from guessit.patterns import group_delimiters
|
||||
from functools import reduce
|
||||
import functools
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SplitExplicitGroups(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 245)
|
||||
def process(mtree):
|
||||
"""return the string split into explicit groups, that is, those either
|
||||
between parenthese, square brackets or curly braces, and those separated
|
||||
by a dash."""
|
||||
for c in mtree.children:
|
||||
groups = find_first_level_groups(c.value, group_delimiters[0])
|
||||
for delimiters in group_delimiters:
|
||||
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
|
||||
groups = functools.reduce(flatten, groups, [])
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""split each of those into explicit groups (separated by parentheses or square brackets)
|
||||
# do not do this at this moment, it is not strong enough and can break other
|
||||
# patterns, such as dates, etc...
|
||||
#groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
|
||||
|
||||
:return: return the string split into explicit groups, that is, those either
|
||||
between parenthese, square brackets or curly braces, and those separated
|
||||
by a dash."""
|
||||
for c in mtree.children:
|
||||
groups = find_first_level_groups(c.value, group_delimiters[0])
|
||||
for delimiters in group_delimiters:
|
||||
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
|
||||
groups = reduce(flatten, groups, [])
|
||||
|
||||
# do not do this at this moment, it is not strong enough and can break other
|
||||
# patterns, such as dates, etc...
|
||||
# groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
|
||||
|
||||
c.split_on_components(groups)
|
||||
c.split_on_components(groups)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,30 +18,25 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from __future__ import unicode_literals
|
||||
from guessit.patterns import sep
|
||||
import re
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SplitOnDash(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 190)
|
||||
def process(mtree):
|
||||
for node in mtree.unidentified_leaves():
|
||||
indices = []
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""split into '-' separated subgroups (with required separator chars
|
||||
around the dash)
|
||||
"""
|
||||
for node in mtree.unidentified_leaves():
|
||||
indices = []
|
||||
didx = 0
|
||||
pattern = re.compile(sep + '-' + sep)
|
||||
match = pattern.search(node.value)
|
||||
while match:
|
||||
span = match.span()
|
||||
indices.extend([ span[0], span[1] ])
|
||||
match = pattern.search(node.value, span[1])
|
||||
|
||||
pattern = re.compile(sep + '-' + sep)
|
||||
match = pattern.search(node.value)
|
||||
while match:
|
||||
span = match.span()
|
||||
indices.extend([span[0], span[1]])
|
||||
match = pattern.search(node.value, span[1])
|
||||
|
||||
if indices:
|
||||
node.partition(indices)
|
||||
if indices:
|
||||
node.partition(indices)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
|
@ -18,28 +18,19 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from __future__ import unicode_literals
|
||||
from guessit import fileutils
|
||||
from os.path import splitext
|
||||
import os.path
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SplitPathComponents(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 255)
|
||||
def process(mtree):
|
||||
"""Returns the filename split into [ dir*, basename, ext ]."""
|
||||
components = fileutils.split_path(mtree.value)
|
||||
basename = components.pop(-1)
|
||||
components += list(os.path.splitext(basename))
|
||||
components[-1] = components[-1][1:] # remove the '.' from the extension
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""first split our path into dirs + basename + ext
|
||||
|
||||
:return: the filename split into [ dir*, basename, ext ]
|
||||
"""
|
||||
if not options.get('name_only'):
|
||||
components = fileutils.split_path(mtree.value)
|
||||
basename = components.pop(-1)
|
||||
components += list(splitext(basename))
|
||||
components[-1] = components[-1][1:] # remove the '.' from the extension
|
||||
|
||||
mtree.split_on_components(components)
|
||||
else:
|
||||
mtree.split_on_components([mtree.value, ''])
|
||||
mtree.split_on_components(components)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue