Switched out guessit libs for the one CP uses, seems to have less depends

This commit is contained in:
echel0n 2014-04-21 23:07:04 -07:00
parent a6cd0f156b
commit 6fea9ddb40
65 changed files with 2034 additions and 7313 deletions

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,11 +18,9 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
import pkg_resources
from .__version__ import __version__
__version__ = '0.6.2'
__all__ = ['Guess', 'Language', __all__ = ['Guess', 'Language',
'guess_file_info', 'guess_video_info', 'guess_file_info', 'guess_video_info',
'guess_movie_info', 'guess_episode_info'] 'guess_movie_info', 'guess_episode_info']
@ -32,69 +30,58 @@ __all__ = ['Guess', 'Language',
# it will then always be available # it will then always be available
# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/ # with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
import sys import sys
if sys.version_info[0] >= 3: # pragma: no cover if sys.version_info[0] >= 3:
PY2, PY3 = False, True PY3 = True
unicode_text_type = str unicode_text_type = str
native_text_type = str native_text_type = str
base_text_type = str base_text_type = str
def u(x): def u(x):
return str(x) return str(x)
def s(x): def s(x):
return x return x
class UnicodeMixin(object): class UnicodeMixin(object):
__str__ = lambda x: x.__unicode__() __str__ = lambda x: x.__unicode__()
import binascii import binascii
def to_hex(x): def to_hex(x):
return binascii.hexlify(x).decode('utf-8') return binascii.hexlify(x).decode('utf-8')
else: # pragma: no cover else:
PY2, PY3 = True, False PY3 = False
__all__ = [str(s) for s in __all__] # fix imports for python2 __all__ = [ str(s) for s in __all__ ] # fix imports for python2
unicode_text_type = unicode unicode_text_type = unicode
native_text_type = str native_text_type = str
base_text_type = basestring base_text_type = basestring
def u(x): def u(x):
if isinstance(x, str): if isinstance(x, str):
return x.decode('utf-8') return x.decode('utf-8')
if isinstance(x, list):
return [u(s) for s in x]
return unicode(x) return unicode(x)
def s(x): def s(x):
if isinstance(x, unicode): if isinstance(x, unicode):
return x.encode('utf-8') return x.encode('utf-8')
if isinstance(x, list): if isinstance(x, list):
return [s(y) for y in x] return [ s(y) for y in x ]
if isinstance(x, tuple): if isinstance(x, tuple):
return tuple(s(y) for y in x) return tuple(s(y) for y in x)
if isinstance(x, dict): if isinstance(x, dict):
return dict((s(key), s(value)) for key, value in x.items()) return dict((s(key), s(value)) for key, value in x.items())
return x return x
class UnicodeMixin(object): class UnicodeMixin(object):
__str__ = lambda x: unicode(x).encode('utf-8') __str__ = lambda x: unicode(x).encode('utf-8')
def to_hex(x): def to_hex(x):
return x.encode('hex') return x.encode('hex')
range = xrange
from guessit.guess import Guess, merge_all from guessit.guess import Guess, merge_all
from guessit.language import Language from guessit.language import Language
from guessit.matcher import IterativeMatcher from guessit.matcher import IterativeMatcher
from guessit.textutils import clean_string, is_camel, from_camel from guessit.textutils import clean_string
import os.path
import logging import logging
import json import json
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class NullHandler(logging.Handler): class NullHandler(logging.Handler):
def emit(self, record): def emit(self, record):
pass pass
@ -104,74 +91,137 @@ h = NullHandler()
log.addHandler(h) log.addHandler(h)
def _guess_filename(filename, options=None, **kwargs): def _guess_filename(filename, filetype):
mtree = _build_filename_mtree(filename, options=options, **kwargs) def find_nodes(tree, props):
_add_camel_properties(mtree, options=options) """Yields all nodes containing any of the given props."""
return mtree.matched() if isinstance(props, base_text_type):
props = [props]
for node in tree.nodes():
if any(prop in node.guess for prop in props):
yield node
def warning(title):
log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string()))
return m
mtree = IterativeMatcher(filename, filetype=filetype)
m = mtree.matched()
second_pass_opts = []
second_pass_transfo_opts = {}
# if there are multiple possible years found, we assume the first one is
# part of the title, reparse the tree taking this into account
years = set(n.value for n in find_nodes(mtree.match_tree, 'year'))
if len(years) >= 2:
second_pass_opts.append('skip_first_year')
to_skip_language_nodes = []
title_nodes = set(n for n in find_nodes(mtree.match_tree, ['title', 'series']))
title_spans = {}
for title_node in title_nodes:
title_spans[title_node.span[0]] = title_node
title_spans[title_node.span[1]] = title_node
for lang_key in ('language', 'subtitleLanguage'):
langs = {}
lang_nodes = set(n for n in find_nodes(mtree.match_tree, lang_key))
for lang_node in lang_nodes:
lang = lang_node.guess.get(lang_key, None)
if len(lang_node.value) > 3 and (lang_node.span[0] in title_spans.keys() or lang_node.span[1] in title_spans.keys()):
# Language is next or before title, and is not a language code. Add to skip for 2nd pass.
# if filetype is subtitle and the language appears last, just before
# the extension, then it is likely a subtitle language
parts = clean_string(lang_node.root.value).split()
if m['type'] in ['moviesubtitle', 'episodesubtitle'] and (parts.index(lang_node.value) == len(parts) - 2):
continue
to_skip_language_nodes.append(lang_node)
elif not lang in langs:
langs[lang] = lang_node
else:
# The same language was found. Keep the more confident one, and add others to skip for 2nd pass.
existing_lang_node = langs[lang]
to_skip = None
if existing_lang_node.guess.confidence('language') >= lang_node.guess.confidence('language'):
# lang_node is to remove
to_skip = lang_node
else:
# existing_lang_node is to remove
langs[lang] = lang_node
to_skip = existing_lang_node
to_skip_language_nodes.append(to_skip)
def _build_filename_mtree(filename, options=None, **kwargs): if to_skip_language_nodes:
mtree = IterativeMatcher(filename, options=options, **kwargs) second_pass_transfo_opts['guess_language'] = (
second_pass_options = mtree.second_pass_options ((), { 'skip': [ { 'node_idx': node.parent.node_idx,
if second_pass_options: 'span': node.span }
log.info("Running 2nd pass") for node in to_skip_language_nodes ] }))
merged_options = dict(options)
merged_options.update(second_pass_options) if second_pass_opts or second_pass_transfo_opts:
mtree = IterativeMatcher(filename, options=merged_options, **kwargs) # 2nd pass is needed
return mtree log.info("Running 2nd pass with options: %s" % second_pass_opts)
log.info("Transfo options: %s" % second_pass_transfo_opts)
mtree = IterativeMatcher(filename, filetype=filetype,
opts=second_pass_opts,
transfo_opts=second_pass_transfo_opts)
m = mtree.matched()
if 'language' not in m and 'subtitleLanguage' not in m or 'title' not in m:
return m
# if we found some language, make sure we didn't cut a title or sth...
mtree2 = IterativeMatcher(filename, filetype=filetype,
opts=['nolanguage', 'nocountry'])
m2 = mtree2.matched()
if m.get('title') != m2.get('title'):
title = next(find_nodes(mtree.match_tree, 'title'))
title2 = next(find_nodes(mtree2.match_tree, 'title'))
# if a node is in an explicit group, then the correct title is probably
# the other one
if title.root.node_at(title.node_idx[:2]).is_explicit():
return m2
elif title2.root.node_at(title2.node_idx[:2]).is_explicit():
return m
return m
def _add_camel_properties(mtree, options=None, **kwargs): def guess_file_info(filename, filetype, info=None):
prop = 'title' if mtree.matched().get('type') != 'episode' else 'series'
value = mtree.matched().get(prop)
_guess_camel_string(mtree, value, options=options, skip_title=False, **kwargs)
for leaf in mtree.match_tree.unidentified_leaves():
value = leaf.value
_guess_camel_string(mtree, value, options=options, skip_title=True, **kwargs)
def _guess_camel_string(mtree, string, options=None, skip_title=False, **kwargs):
if string and is_camel(string):
log.info('"%s" is camel cased. Try to detect more properties.' % (string,))
uncameled_value = from_camel(string)
camel_tree = _build_filename_mtree(uncameled_value, options=options, name_only=True, skip_title=skip_title, **kwargs)
if len(camel_tree.matched()) > 0:
# Title has changed.
mtree.matched().update(camel_tree.matched())
return True
return False
def guess_file_info(filename, info=None, options=None, **kwargs):
"""info can contain the names of the various plugins, such as 'filename' to """info can contain the names of the various plugins, such as 'filename' to
detect filename info, or 'hash_md5' to get the md5 hash of the file. detect filename info, or 'hash_md5' to get the md5 hash of the file.
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
>>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1']) {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
>>> g['hash_md5'], g['hash_sha1']
('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
""" """
info = info or 'filename'
options = options or {}
result = [] result = []
hashers = [] hashers = []
# Force unicode as soon as possible # Force unicode as soon as possible
filename = u(filename) filename = u(filename)
if info is None:
info = ['filename']
if isinstance(info, base_text_type): if isinstance(info, base_text_type):
info = [info] info = [info]
for infotype in info: for infotype in info:
if infotype == 'filename': if infotype == 'filename':
result.append(_guess_filename(filename, options, **kwargs)) result.append(_guess_filename(filename, filetype))
elif infotype == 'hash_mpc': elif infotype == 'hash_mpc':
from guessit.hash_mpc import hash_file from guessit.hash_mpc import hash_file
try: try:
result.append(Guess({infotype: hash_file(filename)}, result.append(Guess({'hash_mpc': hash_file(filename)},
confidence=1.0)) confidence=1.0))
except Exception as e: except Exception as e:
log.warning('Could not compute MPC-style hash because: %s' % e) log.warning('Could not compute MPC-style hash because: %s' % e)
@ -179,7 +229,7 @@ def guess_file_info(filename, info=None, options=None, **kwargs):
elif infotype == 'hash_ed2k': elif infotype == 'hash_ed2k':
from guessit.hash_ed2k import hash_file from guessit.hash_ed2k import hash_file
try: try:
result.append(Guess({infotype: hash_file(filename)}, result.append(Guess({'hash_ed2k': hash_file(filename)},
confidence=1.0)) confidence=1.0))
except Exception as e: except Exception as e:
log.warning('Could not compute ed2k hash because: %s' % e) log.warning('Could not compute ed2k hash because: %s' % e)
@ -217,16 +267,23 @@ def guess_file_info(filename, info=None, options=None, **kwargs):
result = merge_all(result) result = merge_all(result)
# last minute adjustments
# if country is in the guessed properties, make it part of the filename
if 'series' in result and 'country' in result:
result['series'] += ' (%s)' % result['country'].alpha2.upper()
return result return result
def guess_video_info(filename, info=None, options=None, **kwargs): def guess_video_info(filename, info=None):
return guess_file_info(filename, info=info, options=options, type='video', **kwargs) return guess_file_info(filename, 'autodetect', info)
def guess_movie_info(filename, info=None, options=None, **kwargs): def guess_movie_info(filename, info=None):
return guess_file_info(filename, info=info, options=options, type='movie', **kwargs) return guess_file_info(filename, 'movie', info)
def guess_episode_info(filename, info=None, options=None, **kwargs): def guess_episode_info(filename, info=None):
return guess_file_info(filename, info=info, options=options, type='episode', **kwargs) return guess_file_info(filename, 'episode', info)

View file

@ -2,8 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -19,199 +18,109 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from __future__ import print_function
from guessit import u
from guessit import slogging, guess_file_info
from optparse import OptionParser
import logging import logging
import sys
import os import os
import locale
from guessit import PY2, u, guess_file_info
from guessit.options import option_parser
def guess_file(filename, info='filename', options=None, **kwargs): def detect_filename(filename, filetype, info=['filename'], advanced = False):
options = options or {}
filename = u(filename) filename = u(filename)
print('For:', filename) print('For:', filename)
guess = guess_file_info(filename, info, options, **kwargs) print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string(advanced))
if options.get('yaml'):
try:
import yaml
for k, v in guess.items():
if isinstance(v, list) and len(v) == 1:
guess[k] = v[0]
ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False)
i = 0
for yline in ystr.splitlines():
if i == 0:
print("? " + yline[:-1])
elif i == 1:
print(":" + yline[1:])
else:
print(yline)
i = i + 1
return
except ImportError: # pragma: no cover
print('PyYAML not found. Using default output.')
print('GuessIt found:', guess.nice_string(options.get('advanced')))
def _supported_properties(): def run_demo(episodes=True, movies=True, advanced=False):
from guessit.plugins import transformers
all_properties = {}
transformers_properties = []
for transformer in transformers.all_transformers():
supported_properties = transformer.supported_properties()
transformers_properties.append((transformer, supported_properties))
if isinstance(supported_properties, dict):
for property_name, possible_values in supported_properties.items():
current_possible_values = all_properties.get(property_name)
if current_possible_values is None:
current_possible_values = []
all_properties[property_name] = current_possible_values
if possible_values:
current_possible_values.extend(possible_values)
else:
for property_name in supported_properties:
current_possible_values = all_properties.get(property_name)
if current_possible_values is None:
current_possible_values = []
all_properties[property_name] = current_possible_values
return (all_properties, transformers_properties)
def display_transformers():
print('GuessIt transformers:')
_, transformers_properties = _supported_properties()
for transformer, _ in transformers_properties:
print('[@] %s (%s)' % (transformer.name, transformer.priority))
def display_properties(values, transformers):
print('GuessIt properties:')
all_properties, transformers_properties = _supported_properties()
if transformers:
for transformer, properties_list in transformers_properties:
print('[@] %s (%s)' % (transformer.name, transformer.priority))
for property_name in properties_list:
property_values = all_properties.get(property_name)
print(' [+] %s' % (property_name,))
if property_values and values:
_display_property_values(property_name, indent=4)
else:
properties_list = []
properties_list.extend(all_properties.keys())
properties_list.sort()
for property_name in properties_list:
property_values = all_properties.get(property_name)
print(' [+] %s' % (property_name,))
if property_values and values:
_display_property_values(property_name, indent=4)
def _display_property_values(property_name, indent=2):
all_properties, _ = _supported_properties()
property_values = all_properties.get(property_name)
for property_value in property_values:
print(indent * ' ' + '[!] %s' % (property_value,))
def run_demo(episodes=True, movies=True, options=None):
# NOTE: tests should not be added here but rather in the tests/ folder # NOTE: tests should not be added here but rather in the tests/ folder
# this is just intended as a quick example # this is just intended as a quick example
if episodes: if episodes:
testeps = ['Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi', testeps = [ 'Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi', 'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi', 'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi', 'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi', 'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi',
'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg', 'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi', 'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi', 'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi' 'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
] ]
for f in testeps: for f in testeps:
print('-' * 80) print('-'*80)
guess_file(f, options=options, type='episode') detect_filename(f, filetype='episode', advanced=advanced)
if movies: if movies:
testmovies = ['Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv', testmovies = [ 'Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi', 'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi', 'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv', 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv', 'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten 'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten '[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten
'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi', 'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt', 'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv', 'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv', 'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv',
'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi', 'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi',
'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi', 'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi',
'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi', 'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi',
'Movies/Juno (2007)/Juno KLAXXON.avi', 'Movies/Juno (2007)/Juno KLAXXON.avi',
'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv', 'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv',
'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt', 'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt',
'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi', 'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi',
'testsmewt_bugs/movies/Baraka_Edition_Collector.avi' 'testsmewt_bugs/movies/Baraka_Edition_Collector.avi'
] ]
for f in testmovies: for f in testmovies:
print('-' * 80) print('-'*80)
guess_file(f, options=options, type='movie') detect_filename(f, filetype = 'movie', advanced = advanced)
def main(args=None, setup_logging=True): def main():
if setup_logging: slogging.setupLogging()
from guessit import slogging
slogging.setupLogging()
if PY2: # pragma: no cover # see http://bugs.python.org/issue2128
import codecs if sys.version_info.major < 3 and os.name == 'nt':
import locale for i, a in enumerate(sys.argv):
import sys sys.argv[i] = a.decode(locale.getpreferredencoding())
parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]')
parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
help = 'display debug output')
parser.add_option('-i', '--info', dest = 'info', default = 'filename',
help = 'the desired information type: filename, hash_mpc or a hash from python\'s '
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
'them, comma-separated')
parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect',
help = 'the suggested file type: movie, episode or autodetect')
parser.add_option('-a', '--advanced', dest = 'advanced', action='store_true', default = False,
help = 'display advanced information for filename guesses, as json output')
parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
help = 'run a few builtin tests instead of analyzing a file')
# see http://bugs.python.org/issue2128 options, args = parser.parse_args()
if os.name == 'nt':
for i, a in enumerate(sys.argv):
sys.argv[i] = a.decode(locale.getpreferredencoding())
# see https://github.com/wackou/guessit/issues/43
# and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
# Wrap sys.stdout into a StreamWriter to allow writing unicode.
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
if args:
options, args = option_parser.parse_args(args)
else: # pragma: no cover
options, args = option_parser.parse_args()
if options.verbose: if options.verbose:
logging.getLogger().setLevel(logging.DEBUG) logging.getLogger('guessit').setLevel(logging.DEBUG)
help_required = True
if options.properties or options.values:
display_properties(options.values, options.transformers)
help_required = False
elif options.transformers:
display_transformers()
help_required = False
if options.demo: if options.demo:
run_demo(episodes=True, movies=True, options=vars(options)) run_demo(episodes=True, movies=True, advanced=options.advanced)
help_required = False
else: else:
if args: if args:
help_required = False
for filename in args: for filename in args:
guess_file(filename, detect_filename(filename,
info=options.info.split(','), filetype = options.filetype,
options=vars(options) info = options.info.split(','),
) advanced = options.advanced)
if help_required: # pragma: no cover else:
option_parser.print_help() parser.print_help()
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View file

@ -1,20 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
__version__ = '0.7.1'

View file

@ -1,615 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from .patterns import compile_pattern, sep
from . import base_text_type
from .guess import Guess
import types
def _get_span(prop, match):
"""Retrieves span for a match"""
if not prop.global_span and match.re.groups:
start = None
end = None
for i in range(1, match.re.groups + 1):
span = match.span(i)
if start is None or span[0] < start:
start = span[0]
if end is None or span[1] > end:
end = span[1]
return (start, end)
else:
return match.span()
start = span[0]
end = span[1]
def _get_groups(compiled_re):
"""
Retrieves groups from re
:return: list of group names
"""
if compiled_re.groups:
indexgroup = {}
for k, i in compiled_re.groupindex.items():
indexgroup[i] = k
ret = []
for i in range(1, compiled_re.groups + 1):
ret.append(indexgroup.get(i, i))
return ret
else:
return [None]
class NoValidator(object):
def validate(self, prop, string, node, match, entry_start, entry_end):
return True
class DefaultValidator(object):
"""Make sure our match is surrounded by separators, or by another entry"""
def validate(self, prop, string, node, match, entry_start, entry_end):
start, end = _get_span(prop, match)
sep_start = start <= 0 or string[start - 1] in sep
sep_end = end >= len(string) or string[end] in sep
start_by_other = start in entry_end
end_by_other = end in entry_start
if (sep_start or start_by_other) and (sep_end or end_by_other):
return True
return False
class WeakValidator(DefaultValidator):
"""Make sure our match is surrounded by separators and is the first or last element in the string"""
def validate(self, prop, string, node, match, entry_start, entry_end):
if super(WeakValidator, self).validate(prop, string, node, match, entry_start, entry_end):
span = match.span()
start = span[0]
end = span[1]
at_start = True
at_end = True
while start > 0:
start = start - 1
if string[start] not in sep:
at_start = False
break
if at_start:
return True
while end < len(string) - 1:
end = end + 1
if string[end] not in sep:
at_end = False
break
if at_end:
return True
return False
class LeavesValidator(DefaultValidator):
def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
self.previous_lambdas = previous_lambdas if not previous_lambdas is None else []
self.next_lambdas = next_lambdas if not next_lambdas is None else []
if lambdas:
self.previous_lambdas.extend(lambdas)
self.next_lambdas.extend(lambdas)
self.both_side = both_side
self.default_ = default_
"""Make sure our match is surrounded by separators and validates defined lambdas"""
def validate(self, prop, string, node, match, entry_start, entry_end):
if self.default_:
super_ret = super(LeavesValidator, self).validate(prop, string, node, match, entry_start, entry_end)
else:
super_ret = True
if not super_ret:
return False
previous_ = self._validate_previous(prop, string, node, match, entry_start, entry_end)
if previous_ and self.both_side:
return previous_
next_ = self._validate_next(prop, string, node, match, entry_start, entry_end)
if previous_ is None and next_ is None:
return super_ret
if self.both_side:
return previous_ and next_
else:
return previous_ or next_
def _validate_previous(self, prop, string, node, match, entry_start, entry_end):
if self.previous_lambdas:
for leaf in node.root.previous_leaves(node):
for lambda_ in self.previous_lambdas:
ret = self._check_rule(lambda_, leaf)
if not ret is None:
return ret
return False
def _validate_next(self, prop, string, node, match, entry_start, entry_end):
if self.next_lambdas:
for leaf in node.root.next_leaves(node):
for lambda_ in self.next_lambdas:
ret = self._check_rule(lambda_, leaf)
if not ret is None:
return ret
return False
def _check_rule(self, lambda_, previous_leaf):
return lambda_(previous_leaf)
class _Property:
"""Represents a property configuration."""
def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None):
"""
:param keys: Keys of the property (format, screenSize, ...)
:type keys: string
:param canonical_form: Unique value of the property (DVD, 720p, ...)
:type canonical_form: string
:param pattern: Regexp pattern
:type pattern: string
:param confidence: confidence
:type confidence: float
:param enhance: enhance the pattern
:type enhance: boolean
:param global_span: if True, the whole match span will used to create the Guess.
Else, the span from the capturing groups will be used.
:type global_span: boolean
:param validator: Validator to use
:type validator: :class:`DefaultValidator`
:param formatter: Formater to use
:type formatter: function
"""
if isinstance(keys, list):
self.keys = keys
elif isinstance(keys, base_text_type):
self.keys = [keys]
else:
self.keys = []
self.canonical_form = canonical_form
if not pattern is None:
self.pattern = pattern
else:
self.pattern = canonical_form
if self.canonical_form is None and canonical_from_pattern:
self.canonical_form = self.pattern
self.compiled = compile_pattern(self.pattern, enhance=enhance)
for group_name in _get_groups(self.compiled):
if isinstance(group_name, base_text_type) and not group_name in self.keys:
self.keys.append(group_name)
if not self.keys:
raise ValueError("No property key is defined")
self.confidence = confidence
self.global_span = global_span
self.validator = validator
self.formatter = formatter
def format(self, value, group_name=None):
"""Retrieves the final value from re group match value"""
formatter = None
if isinstance(self.formatter, dict):
formatter = self.formatter.get(group_name)
if formatter is None and not group_name is None:
formatter = self.formatter.get(None)
else:
formatter = self.formatter
if isinstance(formatter, types.FunctionType):
return formatter(value)
elif not formatter is None:
return formatter.format(value)
return value
def __repr__(self):
return "%s: %s" % (self.keys, self.canonical_form if self.canonical_form else self.pattern)
class PropertiesContainer(object):
def __init__(self, **kwargs):
self._properties = []
self.default_property_kwargs = kwargs
def unregister_property(self, name, *canonical_forms):
"""Unregister a property canonical forms
If canonical_forms are specified, only those values will be unregistered
:param name: Property name to unregister
:type name: string
:param canonical_forms: Values to unregister
:type canonical_forms: varargs of string
"""
_properties = [prop for prop in self._properties if prop.name == name and (not canonical_forms or prop.canonical_form in canonical_forms)]
def register_property(self, name, *patterns, **property_params):
"""Register property with defined canonical form and patterns.
:param name: name of the property (format, screenSize, ...)
:type name: string
:param patterns: regular expression patterns to register for the property canonical_form
:type patterns: varargs of string
"""
properties = []
for pattern in patterns:
params = dict(self.default_property_kwargs)
params.update(property_params)
if isinstance(pattern, dict):
params.update(pattern)
prop = _Property(name, **params)
else:
prop = _Property(name, pattern, **params)
self._properties.append(prop)
properties.append(prop)
return properties
def register_canonical_properties(self, name, *canonical_forms, **property_params):
"""Register properties from their canonical forms.
:param name: name of the property (releaseGroup, ...)
:type name: string
:param canonical_forms: values of the property ('ESiR', 'WAF', 'SEPTiC', ...)
:type canonical_forms: varargs of strings
"""
properties = []
for canonical_form in canonical_forms:
params = dict(property_params)
params['canonical_form'] = canonical_form
properties.extend(self.register_property(name, canonical_form, **property_params))
return properties
def unregister_all_properties(self):
"""Unregister all defined properties"""
self._properties.clear()
def find_properties(self, string, node, name=None, validate=True, re_match=False, sort=True, multiple=False):
"""Find all distinct properties for given string
If no capturing group is defined in the property, value will be grabbed from the entire match.
If one ore more unnamed capturing group is defined in the property, first capturing group will be used.
If named capturing group are defined in the property, they will be returned as property key.
If validate, found properties will be validated by their defined validator
If re_match, re.match will be used instead of re.search.
if sort, found properties will be sorted from longer match to shorter match.
If multiple is False and multiple values are found for the same property, the more confident one will be returned.
If multiple is False and multiple values are found for the same property and the same confidence, the longer will be returned.
:param string: input string
:type string: string
:param node: current node of the matching tree
:type node: :class:`guessit.matchtree.MatchTree`
:param name: name of property to find
:type name: string
:param re_match: use re.match instead of re.search
:type re_match: bool
:param multiple: Allows multiple property values to be returned
:type multiple: bool
:return: found properties
:rtype: list of tuples (:class:`_Property`, match, list of tuples (property_name, tuple(value_start, value_end)))
:see: `_Property`
:see: `register_property`
:see: `register_canonical_properties`
"""
entry_start = {}
entry_end = {}
entries = []
ret = []
if not string.strip():
return ret
# search all properties
for prop in self.get_properties(name):
match = prop.compiled.match(string) if re_match else prop.compiled.search(string)
if match:
entry = prop, match
entries.append(entry)
if validate:
# compute entries start and ends
for prop, match in entries:
start, end = _get_span(prop, match)
if start not in entry_start:
entry_start[start] = [prop]
else:
entry_start[start].append(prop)
if end not in entry_end:
entry_end[end] = [prop]
else:
entry_end[end].append(prop)
# remove invalid values
while True:
invalid_entries = []
for entry in entries:
prop, match = entry
if not prop.validator.validate(prop, string, node, match, entry_start, entry_end):
invalid_entries.append(entry)
if not invalid_entries:
break
for entry in invalid_entries:
prop, match = entry
entries.remove(entry)
invalid_span = _get_span(prop, match)
start = invalid_span[0]
end = invalid_span[1]
entry_start[start].remove(prop)
if not entry_start.get(start):
del entry_start[start]
entry_end[end].remove(prop)
if not entry_end.get(end):
del entry_end[end]
if multiple:
ret = entries
else:
# keep only best match if multiple values where found
entries_dict = {}
for entry in entries:
for key in prop.keys:
if not key in entries_dict:
entries_dict[key] = []
entries_dict[key].append(entry)
for entries in entries_dict.values():
if multiple:
for entry in entries:
ret.append(entry)
else:
best_ret = {}
best_prop, best_match = None, None
if len(entries) == 1:
best_prop, best_match = entries[0]
else:
for prop, match in entries:
start, end = _get_span(prop, match)
if not best_prop or \
best_prop.confidence < best_prop.confidence or \
best_prop.confidence == best_prop.confidence and \
best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
best_prop, best_match = prop, match
best_ret[best_prop] = best_match
for prop, match in best_ret.items():
ret.append((prop, match))
if sort:
def _sorting(x):
_, x_match = x
x_start, x_end = x_match.span()
return (x_start - x_end)
ret.sort(key=_sorting)
return ret
def as_guess(self, found_properties, input=None, filter=None, sep_replacement=None, multiple=False, *args, **kwargs):
if filter is None:
filter = lambda property, *args, **kwargs: True
guesses = [] if multiple else None
for property in found_properties:
prop, match = property
first_key = None
for key in prop.keys:
# First property key will be used as base for effective name
if isinstance(key, base_text_type):
if first_key is None:
first_key = key
break
property_name = first_key if first_key else None
span = _get_span(prop, match)
guess = Guess(confidence=prop.confidence, input=input, span=span, prop=property_name)
groups = _get_groups(match.re)
for group_name in groups:
name = group_name if isinstance(group_name, base_text_type) else property_name if property_name not in groups else None
if name:
value = self._effective_prop_value(prop, group_name, input, match.span(group_name) if group_name else match.span(), sep_replacement)
if not value is None:
is_string = isinstance(value, base_text_type)
if not is_string or is_string and value: # Keep non empty strings and other defined objects
if isinstance(value, dict):
for k, v in value.items():
if k is None:
k = name
guess[k] = v
else:
guess[name] = value
if group_name:
guess.metadata(prop).span = match.span(group_name)
if filter(guess):
if multiple:
guesses.append(guess)
else:
return guess
return guesses
def _effective_prop_value(self, prop, group_name, input=None, span=None, sep_replacement=None):
if prop.canonical_form:
return prop.canonical_form
if input is None:
return None
value = input
if not span is None:
value = value[span[0]:span[1]]
value = input[span[0]:span[1]] if input else None
if sep_replacement:
for sep_char in sep:
value = value.replace(sep_char, sep_replacement)
if value:
value = prop.format(value, group_name)
return value
def get_properties(self, name=None, canonical_form=None):
"""Retrieve properties
:return: Properties
:rtype: generator
"""
for prop in self._properties:
if (name is None or name in prop.keys) and (canonical_form is None or prop.canonical_form == canonical_form):
yield prop
def get_supported_properties(self):
supported_properties = {}
for prop in self.get_properties():
for k in prop.keys:
values = supported_properties.get(k)
if not values:
values = set()
supported_properties[k] = values
if prop.canonical_form:
values.add(prop.canonical_form)
return supported_properties
class QualitiesContainer():
def __init__(self):
self._qualities = {}
def register_quality(self, name, canonical_form, rating):
"""Register a quality rating.
:param name: Name of the property
:type name: string
:param canonical_form: Value of the property
:type canonical_form: string
:param rating: Estimated quality rating for the property
:type rating: int
"""
property_qualities = self._qualities.get(name)
if property_qualities is None:
property_qualities = {}
self._qualities[name] = property_qualities
property_qualities[canonical_form] = rating
def unregister_quality(self, name, *canonical_forms):
"""Unregister quality ratings for given property name.
If canonical_forms are specified, only those values will be unregistered
:param name: Name of the property
:type name: string
:param canonical_forms: Value of the property
:type canonical_forms: string
"""
if not canonical_forms:
if name in self._qualities:
del self._qualities[name]
else:
property_qualities = self._qualities.get(name)
if not property_qualities is None:
for property_canonical_form in canonical_forms:
if property_canonical_form in property_qualities:
del property_qualities[property_canonical_form]
if not property_qualities:
del self._qualities[name]
def clear_qualities(self,):
"""Unregister all defined quality ratings.
"""
self._qualities.clear()
def rate_quality(self, guess, *props):
"""Rate the quality of guess.
:param guess: Guess to rate
:type guess: :class:`guessit.guess.Guess`
:param props: Properties to include in the rating. if empty, rating will be performed for all guess properties.
:type props: varargs of string
:return: Quality of the guess. The higher, the better.
:rtype: int
"""
rate = 0
if not props:
props = guess.keys()
for prop in props:
prop_value = guess.get(prop)
prop_qualities = self._qualities.get(prop)
if not prop_value is None and not prop_qualities is None:
rate += prop_qualities.get(prop_value, 0)
return rate
def best_quality_properties(self, props, *guesses):
"""Retrieve the best quality guess, based on given properties
:param props: Properties to include in the rating
:type props: list of strings
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
rate = self.rate_quality(guess, *props)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess
def best_quality(self, *guesses):
"""Retrieve the best quality guess.
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
rate = self.rate_quality(guess)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,13 +18,12 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import UnicodeMixin, base_text_type, u from guessit import UnicodeMixin, base_text_type, u
from guessit.fileutils import load_file_in_same_dir from guessit.fileutils import load_file_in_same_dir
import logging import logging
__all__ = ['Country'] __all__ = [ 'Country' ]
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -37,12 +36,12 @@ log = logging.getLogger(__name__)
# are all separated by pipe (|) characters." # are all separated by pipe (|) characters."
_iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt') _iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt')
country_matrix = [l.strip().split('|') country_matrix = [ l.strip().split('|')
for l in _iso3166_contents.strip().split('\n')] for l in _iso3166_contents.strip().split('\n') ]
country_matrix += [['Unknown', 'un', 'unk', '', ''], country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ],
['Latin America', '', 'lat', '', ''] [ 'Latin America', '', 'lat', '', '' ]
] ]
country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix) country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix)) country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
@ -50,16 +49,17 @@ country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matr
# add here exceptions / non ISO representations # add here exceptions / non ISO representations
# Note: remember to put those exceptions in lower-case, they won't work otherwise # Note: remember to put those exceptions in lower-case, they won't work otherwise
country_to_alpha3.update({'latinoamérica': 'lat', country_to_alpha3.update({ 'latinoamérica': 'lat',
'brazilian': 'bra', 'brazilian': 'bra',
'españa': 'esp', 'españa': 'esp',
'uk': 'gbr' 'uk': 'gbr'
}) })
country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix) country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix) country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
class Country(UnicodeMixin): class Country(UnicodeMixin):
"""This class represents a country. """This class represents a country.
@ -78,6 +78,7 @@ class Country(UnicodeMixin):
if self.alpha3 is None: if self.alpha3 is None:
self.alpha3 = 'unk' self.alpha3 = 'unk'
@property @property
def alpha2(self): def alpha2(self):
return country_alpha3_to_alpha2[self.alpha3] return country_alpha3_to_alpha2[self.alpha3]

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,55 +18,15 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
import datetime import datetime
import re import re
import math
_dsep = r'[-/ \.]'
_date_rexps = [re.compile(
# 20010823
r'[^0-9]' +
r'(?P<year>[0-9]{4})' +
r'(?P<month>[0-9]{2})' +
r'(?P<day>[0-9]{2})' +
r'[^0-9]'),
# 2001-08-23
re.compile(r'[^0-9]' +
r'(?P<year>[0-9]{4})' + _dsep +
r'(?P<month>[0-9]{2})' + _dsep +
r'(?P<day>[0-9]{2})' +
r'[^0-9]'),
# 23-08-2001
re.compile(r'[^0-9]' +
r'(?P<day>[0-9]{2})' + _dsep +
r'(?P<month>[0-9]{2})' + _dsep +
r'(?P<year>[0-9]{4})' +
r'[^0-9]'),
# 23-08-01
re.compile(r'[^0-9]' +
r'(?P<day>[0-9]{2})' + _dsep +
r'(?P<month>[0-9]{2})' + _dsep +
r'(?P<year>[0-9]{2})' +
r'[^0-9]'),
]
def valid_year(year, today=None):
"""Check if number is a valid year"""
if not today:
today = datetime.date.today()
return 1920 < year < today.year + 5
def valid_year(year):
return 1920 < year < datetime.date.today().year + 5
def search_year(string): def search_year(string):
"""Looks for year patterns, and if found return the year and group span. """Looks for year patterns, and if found return the year and group span.
Assumes there are sentinels at the beginning and end of the string that Assumes there are sentinels at the beginning and end of the string that
always allow matching a non-digit delimiting the date. always allow matching a non-digit delimiting the date.
@ -74,10 +34,10 @@ def search_year(string):
and now + 5 years, so for instance 2000 would be returned as a valid and now + 5 years, so for instance 2000 would be returned as a valid
year but 1492 would not. year but 1492 would not.
>>> search_year(' in the year 2000... ') >>> search_year('in the year 2000...')
(2000, (13, 17)) (2000, (12, 16))
>>> search_year(' they arrived in 1492. ') >>> search_year('they arrived in 1492.')
(None, None) (None, None)
""" """
match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string) match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
@ -91,32 +51,59 @@ def search_year(string):
def search_date(string): def search_date(string):
"""Looks for date patterns, and if found return the date and group span. """Looks for date patterns, and if found return the date and group span.
Assumes there are sentinels at the beginning and end of the string that Assumes there are sentinels at the beginning and end of the string that
always allow matching a non-digit delimiting the date. always allow matching a non-digit delimiting the date.
Year can be defined on two digit only. It will return the nearest possible >>> search_date('This happened on 2002-04-22.')
date from today. (datetime.date(2002, 4, 22), (17, 27))
>>> search_date(' This happened on 2002-04-22. ') >>> search_date('And this on 17-06-1998.')
(datetime.date(2002, 4, 22), (18, 28)) (datetime.date(1998, 6, 17), (12, 22))
>>> search_date(' And this on 17-06-1998. ') >>> search_date('no date in here')
(datetime.date(1998, 6, 17), (13, 23))
>>> search_date(' no date in here ')
(None, None) (None, None)
""" """
today = datetime.date.today() dsep = r'[-/ \.]'
for drexp in _date_rexps:
date_rexps = [
# 20010823
r'[^0-9]' +
r'(?P<year>[0-9]{4})' +
r'(?P<month>[0-9]{2})' +
r'(?P<day>[0-9]{2})' +
r'[^0-9]',
# 2001-08-23
r'[^0-9]' +
r'(?P<year>[0-9]{4})' + dsep +
r'(?P<month>[0-9]{2})' + dsep +
r'(?P<day>[0-9]{2})' +
r'[^0-9]',
# 23-08-2001
r'[^0-9]' +
r'(?P<day>[0-9]{2})' + dsep +
r'(?P<month>[0-9]{2})' + dsep +
r'(?P<year>[0-9]{4})' +
r'[^0-9]',
# 23-08-01
r'[^0-9]' +
r'(?P<day>[0-9]{2})' + dsep +
r'(?P<month>[0-9]{2})' + dsep +
r'(?P<year>[0-9]{2})' +
r'[^0-9]',
]
for drexp in date_rexps:
match = re.search(drexp, string) match = re.search(drexp, string)
if match: if match:
d = match.groupdict() d = match.groupdict()
year, month, day = int(d['year']), int(d['month']), int(d['day']) year, month, day = int(d['year']), int(d['month']), int(d['day'])
# years specified as 2 digits should be adjusted here # years specified as 2 digits should be adjusted here
if year < 100: if year < 100:
if year > (today.year % 100) + 5: if year > (datetime.date.today().year % 100) + 5:
year = 1900 + year year = 1900 + year
else: else:
year = 2000 + year year = 2000 + year
@ -134,7 +121,7 @@ def search_date(string):
continue continue
# check date plausibility # check date plausibility
if not valid_year(date.year, today=today): if not 1900 < date.year < datetime.date.today().year + 5:
continue continue
# looks like we have a valid date # looks like we have a valid date

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,8 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import s, u from guessit import s, u
import os.path import os.path
import zipfile import zipfile
@ -45,13 +44,17 @@ def split_path(path):
result = [] result = []
while True: while True:
head, tail = os.path.split(path) head, tail = os.path.split(path)
headlen = len(head)
if not head and not tail: # on Unix systems, the root folder is '/'
return result if head and head == '/'*headlen and tail == '':
return ['/'] + result
if not tail and head == path: # on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\
# Make sure we won't have an infinite loop. if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '':
result = [head] + result return [head] + result
if head == '' and tail == '':
return result return result
# we just split a directory ending with '/', so tail is empty # we just split a directory ending with '/', so tail is empty
@ -67,8 +70,8 @@ def split_path(path):
def file_in_same_dir(ref_file, desired_file): def file_in_same_dir(ref_file, desired_file):
"""Return the path for a file in the same dir as a given reference file. """Return the path for a file in the same dir as a given reference file.
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings')) == os.path.normpath('~/smewt/smewt.settings') >>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings'))
True '~/smewt/smewt.settings'
""" """
return os.path.join(*(split_path(ref_file)[:-1] + [desired_file])) return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,9 +18,10 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import UnicodeMixin, s, u, base_text_type from guessit import UnicodeMixin, s, u, base_text_type
from guessit.language import Language
from guessit.country import Country
import json import json
import datetime import datetime
import logging import logging
@ -28,103 +29,6 @@ import logging
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class GuessMetadata(object):
"""GuessMetadata contains confidence, an input string, span and related property.
If defined on a property of Guess object, it overrides the object defined as global.
:param parent: The parent metadata, used for undefined properties in self object
:type parent: :class: `GuessMedata`
:param confidence: The confidence (from 0.0 to 1.0)
:type confidence: number
:param input: The input string
:type input: string
:param span: The input string
:type span: tuple (int, int)
:param prop: The found property definition
:type prop: :class `guessit.containers._Property`
"""
def __init__(self, parent=None, confidence=None, input=None, span=None, prop=None, *args, **kwargs):
self.parent = parent
if confidence is None and self.parent is None:
self._confidence = 1.0
else:
self._confidence = confidence
self._input = input
self._span = span
self._prop = prop
@property
def confidence(self):
"""The confidence
:rtype: int
:return: confidence value
"""
return self._confidence if not self._confidence is None else self.parent.confidence if self.parent else None
@confidence.setter
def confidence(self, confidence):
self._confidence = confidence
@property
def input(self):
"""The input
:rtype: string
:return: String used to find this guess value
"""
return self._input if not self._input is None else self.parent.input if self.parent else None
@property
def span(self):
"""The span
:rtype: tuple (int, int)
:return: span of input string used to find this guess value
"""
return self._span if not self._span is None else self.parent.span if self.parent else None
@span.setter
def span(self, span):
"""The span
:rtype: tuple (int, int)
:return: span of input string used to find this guess value
"""
self._span = span
@property
def prop(self):
"""The property
:rtype: :class:`_Property`
:return: The property
"""
return self._prop if not self._prop is None else self.parent.prop if self.parent else None
@property
def raw(self):
"""Return the raw information (original match from the string,
not the cleaned version) associated with the given property name."""
if self.input and self.span:
return self.input[self.span[0]:self.span[1]]
return None
def __repr__(self, *args, **kwargs):
return object.__repr__(self, *args, **kwargs)
def _split_kwargs(**kwargs):
metadata_args = {}
for prop in dir(GuessMetadata):
try:
metadata_args[prop] = kwargs.pop(prop)
except KeyError:
pass
return metadata_args, kwargs
class Guess(UnicodeMixin, dict): class Guess(UnicodeMixin, dict):
"""A Guess is a dictionary which has an associated confidence for each of """A Guess is a dictionary which has an associated confidence for each of
its values. its values.
@ -133,98 +37,91 @@ class Guess(UnicodeMixin, dict):
simple dict.""" simple dict."""
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
metadata_kwargs, kwargs = _split_kwargs(**kwargs) try:
self._global_metadata = GuessMetadata(**metadata_kwargs) confidence = kwargs.pop('confidence')
except KeyError:
confidence = 0
try:
raw = kwargs.pop('raw')
except KeyError:
raw = None
dict.__init__(self, *args, **kwargs) dict.__init__(self, *args, **kwargs)
self._metadata = {} self._confidence = {}
self._raw = {}
for prop in self: for prop in self:
self._metadata[prop] = GuessMetadata(parent=self._global_metadata) self._confidence[prop] = confidence
self._raw[prop] = raw
def to_dict(self, advanced=False): def to_dict(self, advanced=False):
"""Return the guess as a dict containing only base types, ie:
where dates, languages, countries, etc. are converted to strings.
if advanced is True, return the data as a json string containing
also the raw information of the properties."""
data = dict(self) data = dict(self)
for prop, value in data.items(): for prop, value in data.items():
if isinstance(value, datetime.date): if isinstance(value, datetime.date):
data[prop] = value.isoformat() data[prop] = value.isoformat()
elif isinstance(value, (UnicodeMixin, base_text_type)): elif isinstance(value, (Language, Country, base_text_type)):
data[prop] = u(value) data[prop] = u(value)
elif isinstance(value, list): elif isinstance(value, list):
data[prop] = [u(x) for x in value] data[prop] = [u(x) for x in value]
if advanced: if advanced:
metadata = self.metadata(prop) data[prop] = {"value": data[prop], "raw": self.raw(prop), "confidence": self.confidence(prop)}
prop_data = {'value': data[prop]}
if metadata.raw:
prop_data['raw'] = metadata.raw
if metadata.confidence:
prop_data['confidence'] = metadata.confidence
data[prop] = prop_data
return data return data
def nice_string(self, advanced=False): def nice_string(self, advanced=False):
"""Return a string with the property names and their values,
that also displays the associated confidence to each property.
FIXME: doc with param"""
if advanced: if advanced:
data = self.to_dict(advanced) data = self.to_dict(advanced)
return json.dumps(data, indent=4) return json.dumps(data, indent=4)
else: else:
data = self.to_dict() data = self.to_dict()
parts = json.dumps(data, indent=4).split('\n') parts = json.dumps(data, indent=4).split('\n')
for i, p in enumerate(parts): for i, p in enumerate(parts):
if p[:5] != ' "': if p[:5] != ' "':
continue continue
prop = p.split('"')[1] prop = p.split('"')[1]
parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:] parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:]
return '\n'.join(parts) return '\n'.join(parts)
def __unicode__(self): def __unicode__(self):
return u(self.to_dict()) return u(self.to_dict())
def metadata(self, prop=None): def confidence(self, prop):
"""Return the metadata associated with the given property name return self._confidence.get(prop, -1)
If no property name is given, get the global_metadata
"""
if prop is None:
return self._global_metadata
if not prop in self._metadata:
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
return self._metadata[prop]
def confidence(self, prop=None):
return self.metadata(prop).confidence
def set_confidence(self, prop, confidence):
self.metadata(prop).confidence = confidence
def raw(self, prop): def raw(self, prop):
return self.metadata(prop).raw return self._raw.get(prop, None)
def set(self, prop_name, value, *args, **kwargs): def set(self, prop, value, confidence=None, raw=None):
self[prop_name] = value self[prop] = value
self._metadata[prop_name] = GuessMetadata(parent=self._global_metadata, *args, **kwargs) if confidence is not None:
self._confidence[prop] = confidence
if raw is not None:
self._raw[prop] = raw
def update(self, other, confidence=None): def set_confidence(self, prop, value):
self._confidence[prop] = value
def set_raw(self, prop, value):
self._raw[prop] = value
def update(self, other, confidence=None, raw=None):
dict.update(self, other) dict.update(self, other)
if isinstance(other, Guess): if isinstance(other, Guess):
for prop in other: for prop in other:
try: self._confidence[prop] = other.confidence(prop)
self._metadata[prop] = other._metadata[prop] self._raw[prop] = other.raw(prop)
except KeyError:
pass if confidence is not None:
if not confidence is None:
for prop in other: for prop in other:
self.set_confidence(prop, confidence) self._confidence[prop] = confidence
if raw is not None:
for prop in other:
self._raw[prop] = raw
def update_highest_confidence(self, other): def update_highest_confidence(self, other):
"""Update this guess with the values from the given one. In case """Update this guess with the values from the given one. In case
@ -234,16 +131,17 @@ class Guess(UnicodeMixin, dict):
raise ValueError('Can only call this function on Guess instances') raise ValueError('Can only call this function on Guess instances')
for prop in other: for prop in other:
if prop in self and self.metadata(prop).confidence >= other.metadata(prop).confidence: if prop in self and self.confidence(prop) >= other.confidence(prop):
continue continue
self[prop] = other[prop] self[prop] = other[prop]
self._metadata[prop] = other.metadata(prop) self._confidence[prop] = other.confidence(prop)
self._raw[prop] = other.raw(prop)
def choose_int(g1, g2): def choose_int(g1, g2):
"""Function used by merge_similar_guesses to choose between 2 possible """Function used by merge_similar_guesses to choose between 2 possible
properties when they are integers.""" properties when they are integers."""
v1, c1 = g1 # value, confidence v1, c1 = g1 # value, confidence
v2, c2 = g2 v2, c2 = g2
if (v1 == v2): if (v1 == v2):
return (v1, 1 - (1 - c1) * (1 - c2)) return (v1, 1 - (1 - c1) * (1 - c2))
@ -281,7 +179,7 @@ def choose_string(g1, g2):
('The Simpsons', 0.75) ('The Simpsons', 0.75)
""" """
v1, c1 = g1 # value, confidence v1, c1 = g1 # value, confidence
v2, c2 = g2 v2, c2 = g2
if not v1: if not v1:
@ -388,48 +286,43 @@ def merge_all(guesses, append=None):
instead of being merged. instead of being merged.
>>> s(merge_all([ Guess({'season': 2}, confidence=0.6), >>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
... Guess({'episodeNumber': 13}, confidence=0.8) ]) ... Guess({'episodeNumber': 13}, confidence=0.8) ]))
... ) == {'season': 2, 'episodeNumber': 13} {'season': 2, 'episodeNumber': 13}
True
>>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02), >>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
... Guess({'season': 1}, confidence=0.2) ]) ... Guess({'season': 1}, confidence=0.2) ]))
... ) == {'season': 1} {'season': 1}
True
>>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8), >>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
... Guess({'releaseGroup': '2HD'}, confidence=0.8) ], ... Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
... append=['other']) ... append=['other']))
... ) == {'releaseGroup': '2HD', 'other': ['PROPER']} {'releaseGroup': '2HD', 'other': ['PROPER']}
True
""" """
result = Guess()
if not guesses: if not guesses:
return result return Guess()
result = guesses[0]
if append is None: if append is None:
append = [] append = []
for g in guesses: for g in guesses[1:]:
# first append our appendable properties # first append our appendable properties
for prop in append: for prop in append:
if prop in g: if prop in g:
result.set(prop, result.get(prop, []) + [g[prop]], result.set(prop, result.get(prop, []) + [g[prop]],
# TODO: what to do with confidence here? maybe an # TODO: what to do with confidence here? maybe an
# arithmetic mean... # arithmetic mean...
confidence=g.metadata(prop).confidence, confidence=g.confidence(prop),
input=g.metadata(prop).input, raw=g.raw(prop))
span=g.metadata(prop).span,
prop=g.metadata(prop).prop)
del g[prop] del g[prop]
# then merge the remaining ones # then merge the remaining ones
dups = set(result) & set(g) dups = set(result) & set(g)
if dups: if dups:
log.warning('duplicate properties %s in merged result...' % [(result[p], g[p]) for p in dups]) log.warning('duplicate properties %s in merged result...' % [ (result[p], g[p]) for p in dups] )
result.update_highest_confidence(g) result.update_highest_confidence(g)
@ -445,7 +338,7 @@ def merge_all(guesses, append=None):
if isinstance(value, list): if isinstance(value, list):
result[prop] = list(set(value)) result[prop] = list(set(value))
else: else:
result[prop] = [value] result[prop] = [ value ]
except KeyError: except KeyError:
pass pass

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,8 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import s, to_hex from guessit import s, to_hex
import hashlib import hashlib
import os.path import os.path
@ -28,9 +27,8 @@ import os.path
def hash_file(filename): def hash_file(filename):
"""Returns the ed2k hash of a given file. """Returns the ed2k hash of a given file.
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> s(hash_file('tests/dummy.srt'))
>>> s(hash_file(testfile)) 'ed2k://|file|dummy.srt|44|1CA0B9DED3473B926AA93A0A546138BB|/'
'ed2k://|file|dummy.srt|59|41F58B913AB3973F593BEBA8B8DF6510|/'
""" """
return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename), return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
os.path.getsize(filename), os.path.getsize(filename),

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,8 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
import struct import struct
import os import os
@ -29,7 +28,7 @@ def hash_file(filename):
http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
and is licensed under the GPL.""" and is licensed under the GPL."""
longlongformat = b'q' # long long longlongformat = 'q' # long long
bytesize = struct.calcsize(longlongformat) bytesize = struct.calcsize(longlongformat)
f = open(filename, "rb") f = open(filename, "rb")
@ -40,14 +39,14 @@ def hash_file(filename):
if filesize < 65536 * 2: if filesize < 65536 * 2:
raise Exception("SizeError: size is %d, should be > 132K..." % filesize) raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
for x in range(int(65536 / bytesize)): for x in range(65536 / bytesize):
buf = f.read(bytesize) buf = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buf) (l_value,) = struct.unpack(longlongformat, buf)
hash_value += l_value hash_value += l_value
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number hash_value = hash_value & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
f.seek(max(0, filesize - 65536), 0) f.seek(max(0, filesize - 65536), 0)
for x in range(int(65536 / bytesize)): for x in range(65536 / bytesize):
buf = f.read(bytesize) buf = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buf) (l_value,) = struct.unpack(longlongformat, buf)
hash_value += l_value hash_value += l_value

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,143 +18,122 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import UnicodeMixin, base_text_type, u, s
from guessit import UnicodeMixin, base_text_type, u from guessit.fileutils import load_file_in_same_dir
from guessit.textutils import find_words from guessit.textutils import find_words
from babelfish import Language from guessit.country import Country
import babelfish
import re import re
import logging import logging
from guessit.guess import Guess
__all__ = ['Language', 'UNDETERMINED', __all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language',
'search_language', 'guess_language'] 'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'UNDETERMINED',
'search_language', 'guess_language' ]
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
UNDETERMINED = babelfish.Language('und')
SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'], # downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
('ell', None): ['gr', 'greek'], #
('spa', None): ['esp', 'español'], # Description of the fields:
('fra', None): ['français', 'vf', 'vff', 'vfi'], # "An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given),
('swe', None): ['se'], # an alpha-2 code (when given), an English name, and a French name of a language
('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'], # are all separated by pipe (|) characters."
('cat', None): ['català'], _iso639_contents = load_file_in_same_dir(__file__, 'ISO-639-2_utf-8.txt')
('ces', None): ['cz'],
('ukr', None): ['ua'], # drop the BOM from the beginning of the file
('zho', None): ['cn'], _iso639_contents = _iso639_contents[1:]
('jpn', None): ['jp'],
('hrv', None): ['scr'], language_matrix = [ l.strip().split('|')
('mul', None): ['multi', 'dl'], # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/ for l in _iso639_contents.strip().split('\n') ]
}
class GuessitConverter(babelfish.LanguageReverseConverter): # update information in the language matrix
language_matrix += [['mol', '', 'mo', 'Moldavian', 'moldave'],
['ass', '', '', 'Assyrian', 'assyrien']]
_with_country_regexp = re.compile('(.*)\((.*)\)') for lang in language_matrix:
_with_country_regexp2 = re.compile('(.*)-(.*)') # remove unused languages that shadow other common ones with a non-official form
if (lang[2] == 'se' or # Northern Sami shadows Swedish
def __init__(self): lang[2] == 'br'): # Breton shadows Brazilian
self.guessit_exceptions = {} lang[2] = ''
for (alpha3, country), synlist in SYN.items(): # add missing information
for syn in synlist: if lang[0] == 'und':
self.guessit_exceptions[syn.lower()] = (alpha3, country, None) lang[2] = 'un'
if lang[0] == 'srp':
@property lang[1] = 'scc' # from OpenSubtitles
def codes(self):
return (babelfish.language_converters['alpha3b'].codes |
babelfish.language_converters['alpha2'].codes |
babelfish.language_converters['name'].codes |
babelfish.language_converters['opensubtitles'].codes |
babelfish.country_converters['name'].codes |
frozenset(self.guessit_exceptions.keys()))
def convert(self, alpha3, country=None, script=None):
return str(babelfish.Language(alpha3, country, script))
def reverse(self, name):
with_country = (GuessitConverter._with_country_regexp.match(name) or
GuessitConverter._with_country_regexp2.match(name))
if with_country:
lang = babelfish.Language.fromguessit(with_country.group(1).strip())
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None)
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
return self.guessit_exceptions[name.lower()]
except KeyError:
pass
for conv in [babelfish.Language,
babelfish.Language.fromalpha3b,
babelfish.Language.fromalpha2,
babelfish.Language.fromname,
babelfish.Language.fromopensubtitles]:
try:
c = conv(name)
return c.alpha3, c.country, c.script
except (ValueError, babelfish.LanguageReverseError):
pass
raise babelfish.LanguageReverseError(name)
babelfish.language_converters['guessit'] = GuessitConverter() lng3 = frozenset(l[0] for l in language_matrix if l[0])
lng3term = frozenset(l[1] for l in language_matrix if l[1])
lng2 = frozenset(l[2] for l in language_matrix if l[2])
lng_en_name = frozenset(lng for l in language_matrix
for lng in l[3].lower().split('; ') if lng)
lng_fr_name = frozenset(lng for l in language_matrix
for lng in l[4].lower().split('; ') if lng)
lng_all_names = lng3 | lng3term | lng2 | lng_en_name | lng_fr_name
COUNTRIES_SYN = {'ES': ['españa'], lng3_to_lng3term = dict((l[0], l[1]) for l in language_matrix if l[1])
'GB': ['UK'], lng3term_to_lng3 = dict((l[1], l[0]) for l in language_matrix if l[1])
'BR': ['brazilian', 'bra'],
# FIXME: this one is a bit of a stretch, not sure how to do lng3_to_lng2 = dict((l[0], l[2]) for l in language_matrix if l[2])
# it properly, though... lng2_to_lng3 = dict((l[2], l[0]) for l in language_matrix if l[2])
'MX': ['Latinoamérica', 'latin america']
} # we only return the first given english name, hoping it is the most used one
lng3_to_lng_en_name = dict((l[0], l[3].split('; ')[0])
for l in language_matrix if l[3])
lng_en_name_to_lng3 = dict((en_name.lower(), l[0])
for l in language_matrix if l[3]
for en_name in l[3].split('; '))
# we only return the first given french name, hoping it is the most used one
lng3_to_lng_fr_name = dict((l[0], l[4].split('; ')[0])
for l in language_matrix if l[4])
lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0])
for l in language_matrix if l[4]
for fr_name in l[4].split('; '))
# contains a list of exceptions: strings that should be parsed as a language
# but which are not in an ISO form
lng_exceptions = { 'unknown': ('und', None),
'inconnu': ('und', None),
'unk': ('und', None),
'un': ('und', None),
'gr': ('gre', None),
'greek': ('gre', None),
'esp': ('spa', None),
'español': ('spa', None),
'se': ('swe', None),
'po': ('pt', 'br'),
'pb': ('pt', 'br'),
'pob': ('pt', 'br'),
'br': ('pt', 'br'),
'brazilian': ('pt', 'br'),
'català': ('cat', None),
'cz': ('cze', None),
'ua': ('ukr', None),
'cn': ('chi', None),
'chs': ('chi', None),
'jp': ('jpn', None),
'scr': ('hrv', None)
}
class GuessitCountryConverter(babelfish.CountryReverseConverter): def is_iso_language(language):
def __init__(self): return language.lower() in lng_all_names
self.guessit_exceptions = {}
for alpha2, synlist in COUNTRIES_SYN.items(): def is_language(language):
for syn in synlist: return is_iso_language(language) or language in lng_exceptions
self.guessit_exceptions[syn.lower()] = alpha2
@property def lang_set(languages, strict=False):
def codes(self): """Return a set of guessit.Language created from their given string
return (babelfish.country_converters['name'].codes | representation.
frozenset(babelfish.COUNTRIES.values()) |
frozenset(self.guessit_exceptions.keys()))
def convert(self, alpha2): if strict is True, then this will raise an exception if any language
return str(babelfish.Country(alpha2)) could not be identified.
"""
def reverse(self, name): return set(Language(l, strict=strict) for l in languages)
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
return self.guessit_exceptions[name.lower()]
except KeyError:
pass
try:
return babelfish.Country(name.upper()).alpha2
except ValueError:
pass
for conv in [babelfish.Country.fromname]:
try:
return conv(name).alpha2
except babelfish.CountryReverseError:
pass
raise babelfish.CountryReverseError(name)
babelfish.country_converters['guessit'] = GuessitCountryConverter()
class Language(UnicodeMixin): class Language(UnicodeMixin):
@ -174,65 +153,109 @@ class Language(UnicodeMixin):
>>> Language('fr') >>> Language('fr')
Language(French) Language(French)
>>> (Language('eng').english_name) == 'English' >>> s(Language('eng').french_name)
'anglais'
>>> s(Language('pt(br)').country.english_name)
'Brazil'
>>> s(Language('Español (Latinoamérica)').country.english_name)
'Latin America'
>>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)')
True True
>>> (Language('pt(br)').country.name) == 'BRAZIL' >>> s(Language('zz', strict=False).english_name)
True 'Undetermined'
>>> (Language('zz', strict=False).english_name) == 'Undetermined' >>> s(Language('pt(br)').opensubtitles)
True 'pob'
>>> (Language('pt(br)').opensubtitles) == 'pob'
True
""" """
def __init__(self, language, country=None, strict=False): _with_country_regexp = re.compile('(.*)\((.*)\)')
_with_country_regexp2 = re.compile('(.*)-(.*)')
def __init__(self, language, country=None, strict=False, scheme=None):
language = u(language.strip().lower()) language = u(language.strip().lower())
country = babelfish.Country(country.upper()) if country else None with_country = (Language._with_country_regexp.match(language) or
Language._with_country_regexp2.match(language))
if with_country:
self.lang = Language(with_country.group(1)).lang
self.country = Country(with_country.group(2))
return
try: self.lang = None
self.lang = babelfish.Language.fromguessit(language) self.country = Country(country) if country else None
# user given country overrides guessed one
if country:
self.lang.country = country
except babelfish.LanguageReverseError: # first look for scheme specific languages
msg = 'The given string "%s" could not be identified as a language' % language if scheme == 'opensubtitles':
if strict: if language == 'br':
raise ValueError(msg) self.lang = 'bre'
return
elif language == 'se':
self.lang = 'sme'
return
elif scheme is not None:
log.warning('Unrecognized scheme: "%s" - Proceeding with standard one' % scheme)
# look for ISO language codes
if len(language) == 2:
self.lang = lng2_to_lng3.get(language)
elif len(language) == 3:
self.lang = (language
if language in lng3
else lng3term_to_lng3.get(language))
else:
self.lang = (lng_en_name_to_lng3.get(language) or
lng_fr_name_to_lng3.get(language))
# general language exceptions
if self.lang is None and language in lng_exceptions:
lang, country = lng_exceptions[language]
self.lang = Language(lang).alpha3
self.country = Country(country) if country else None
msg = 'The given string "%s" could not be identified as a language' % language
if self.lang is None and strict:
raise ValueError(msg)
if self.lang is None:
log.debug(msg) log.debug(msg)
self.lang = UNDETERMINED self.lang = 'und'
@property
def country(self):
return self.lang.country
@property @property
def alpha2(self): def alpha2(self):
return self.lang.alpha2 return lng3_to_lng2[self.lang]
@property @property
def alpha3(self): def alpha3(self):
return self.lang.alpha3 return self.lang
@property @property
def alpha3term(self): def alpha3term(self):
return self.lang.alpha3b return lng3_to_lng3term[self.lang]
@property @property
def english_name(self): def english_name(self):
return self.lang.name return lng3_to_lng_en_name[self.lang]
@property
def french_name(self):
return lng3_to_lng_fr_name[self.lang]
@property @property
def opensubtitles(self): def opensubtitles(self):
return self.lang.opensubtitles if self.lang == 'por' and self.country and self.country.alpha2 == 'br':
return 'pob'
elif self.lang in ['gre', 'srp']:
return self.alpha3term
return self.alpha3
@property @property
def tmdb(self): def tmdb(self):
if self.country: if self.country:
return '%s-%s' % (self.alpha2, self.country.alpha2) return '%s-%s' % (self.alpha2, self.country.alpha2.upper())
return self.alpha2 return self.alpha2
def __hash__(self): def __hash__(self):
@ -240,8 +263,7 @@ class Language(UnicodeMixin):
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, Language): if isinstance(other, Language):
# in Guessit, languages are considered equal if their main languages are equal return self.lang == other.lang
return self.alpha3 == other.alpha3
if isinstance(other, base_text_type): if isinstance(other, base_text_type):
try: try:
@ -254,138 +276,115 @@ class Language(UnicodeMixin):
def __ne__(self, other): def __ne__(self, other):
return not self == other return not self == other
def __bool__(self): def __nonzero__(self):
return self.lang != UNDETERMINED return self.lang != 'und'
__nonzero__ = __bool__
def __unicode__(self): def __unicode__(self):
if self.lang.country: if self.country:
return '%s(%s)' % (self.english_name, self.country.alpha2) return '%s(%s)' % (self.english_name, self.country.alpha2)
else: else:
return self.english_name return self.english_name
def __repr__(self): def __repr__(self):
if self.lang.country: if self.country:
return 'Language(%s, country=%s)' % (self.english_name, self.lang.country) return 'Language(%s, country=%s)' % (self.english_name, self.country)
else: else:
return 'Language(%s)' % self.english_name return 'Language(%s)' % self.english_name
# list of common words which could be interpreted as languages, but which UNDETERMINED = Language('und')
# are far too common to be able to say they represent a language in the ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED])
# middle of a string (where they most likely carry their commmon meaning) ALL_LANGUAGES_NAMES = lng_all_names
LNG_COMMON_WORDS = frozenset([
# english words
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
# french words
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
'mal', 'est', 'vol', 'or', 'mon', 'se',
# spanish words
'la', 'el', 'del', 'por', 'mar',
# other
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
'vi', 'ben', 'da', 'lt', 'ch',
# new from babelfish
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
'fer', 'fun', 'two', 'big', 'psy', 'air',
# release groups
'bs' # Bosnian
])
def search_language(string, lang_filter=None, skip=None):
subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
lang_prefixes = ['true']
def find_possible_languages(string):
"""Find possible languages in the string
:return: list of tuple (property, Language, lang_word, word)
"""
words = find_words(string)
valid_words = []
for word in words:
lang_word = word.lower()
key = 'language'
for prefix in subtitle_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
key = 'subtitleLanguage'
for suffix in subtitle_suffixes:
if lang_word.endswith(suffix):
lang_word = lang_word[:len(suffix)]
key = 'subtitleLanguage'
for prefix in lang_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
if not lang_word in LNG_COMMON_WORDS:
try:
lang = Language(lang_word)
# Keep language with alpha2 equilavent. Others are probably an uncommon language.
if lang == 'mul' or hasattr(lang, 'alpha2'):
valid_words.append((key, lang, lang_word, word))
except babelfish.Error:
pass
return valid_words
def search_language(string, lang_filter=None):
"""Looks for language patterns, and if found return the language object, """Looks for language patterns, and if found return the language object,
its group span and an associated confidence. its group span and an associated confidence.
you can specify a list of allowed languages using the lang_filter argument, you can specify a list of allowed languages using the lang_filter argument,
as in lang_filter = [ 'fr', 'eng', 'spanish' ] as in lang_filter = [ 'fr', 'eng', 'spanish' ]
>>> search_language('movie [en].avi')['language'] >>> search_language('movie [en].avi')
Language(English) (Language(English), (7, 9), 0.8)
>>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es']) >>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es'])
(None, None, None)
""" """
# list of common words which could be interpreted as languages, but which
# are far too common to be able to say they represent a language in the
# middle of a string (where they most likely carry their commmon meaning)
lng_common_words = frozenset([
# english words
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
'fry', 'cop', 'zen', 'gay', 'fat', 'cherokee', 'got', 'an', 'as',
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
# french words
'bas', 'de', 'le', 'son', 'vo', 'vf', 'ne', 'ca', 'ce', 'et', 'que',
'mal', 'est', 'vol', 'or', 'mon', 'se',
# spanish words
'la', 'el', 'del', 'por', 'mar',
# other
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
'vi', 'ben', 'da', 'lt'
])
sep = r'[](){} \._-+'
if lang_filter: if lang_filter:
lang_filter = set(babelfish.Language.fromguessit(lang) for lang in lang_filter) lang_filter = lang_set(lang_filter)
confidence = 1.0 # for all of them slow = ' %s ' % string.lower()
confidence = 1.0 # for all of them
for prop, language, lang, word in find_possible_languages(string): for lang in set(find_words(slow)) & lng_all_names:
pos = string.find(word)
end = pos + len(word)
if lang_filter and language not in lang_filter: if lang in lng_common_words:
continue continue
# only allow those languages that have a 2-letter code, those that pos = slow.find(lang)
# don't are too esoteric and probably false matches
#if language.lang not in lng3_to_lng2:
# continue
# confidence depends on alpha2, alpha3, english name, ... if pos != -1:
if len(lang) == 2: end = pos + len(lang)
confidence = 0.8
elif len(lang) == 3: # skip if span in in skip list
confidence = 0.9 while skip and (pos - 1, end - 1) in skip:
elif prop == 'subtitleLanguage': pos = slow.find(lang, end)
confidence = 0.6 # Subtitle prefix found with language if pos == -1:
else: continue
# Note: we could either be really confident that we found a end = pos + len(lang)
# language or assume that full language names are too if pos == -1:
# common words and lower their confidence accordingly continue
confidence = 0.3 # going with the low-confidence route here
# make sure our word is always surrounded by separators
if slow[pos - 1] not in sep or slow[end] not in sep:
continue
return Guess({prop: language}, confidence=confidence, input=string, span=(pos, end)) language = Language(slow[pos:end])
if lang_filter and language not in lang_filter:
continue
return None # only allow those languages that have a 2-letter code, those that
# don't are too esoteric and probably false matches
if language.lang not in lng3_to_lng2:
continue
# confidence depends on lng2, lng3, english name, ...
if len(lang) == 2:
confidence = 0.8
elif len(lang) == 3:
confidence = 0.9
else:
# Note: we could either be really confident that we found a
# language or assume that full language names are too
# common words and lower their confidence accordingly
confidence = 0.3 # going with the low-confidence route here
return language, (pos - 1, end - 1), confidence
return None, None, None
def guess_language(text): # pragma: no cover def guess_language(text):
"""Guess the language in which a body of text is written. """Guess the language in which a body of text is written.
This uses the external guess-language python module, and will fail and return This uses the external guess-language python module, and will fail and return
@ -393,7 +392,7 @@ def guess_language(text): # pragma: no cover
""" """
try: try:
from guess_language import guessLanguage from guess_language import guessLanguage
return babelfish.Language.fromguessit(guessLanguage(text)) return Language(guessLanguage(text))
except ImportError: except ImportError:
log.error('Cannot detect the language of the given text body, missing dependency: guess-language') log.error('Cannot detect the language of the given text body, missing dependency: guess-language')

View file

@ -2,8 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -19,229 +18,163 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, \ from __future__ import unicode_literals
unicode_literals from guessit import PY3, u, base_text_type
import logging
from guessit import PY3, u
from guessit.transfo import TransformerException
from guessit.matchtree import MatchTree from guessit.matchtree import MatchTree
from guessit.textutils import normalize_unicode, clean_string from guessit.textutils import normalize_unicode, clean_string
from guessit.guess import Guess import logging
import inspect
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class IterativeMatcher(object): class IterativeMatcher(object):
"""An iterative matcher tries to match different patterns that appear def __init__(self, filename, filetype='autodetect', opts=None, transfo_opts=None):
in the filename. """An iterative matcher tries to match different patterns that appear
in the filename.
The ``filetype`` argument indicates which type of file you want to match. The 'filetype' argument indicates which type of file you want to match.
If it is undefined, the matcher will try to see whether it can guess If it is 'autodetect', the matcher will try to see whether it can guess
that the file corresponds to an episode, or otherwise will assume it is that the file corresponds to an episode, or otherwise will assume it is
a movie. a movie.
The recognized ``filetype`` values are: The recognized 'filetype' values are:
``['subtitle', 'info', 'movie', 'moviesubtitle', 'movieinfo', 'episode', [ autodetect, subtitle, info, movie, moviesubtitle, movieinfo, episode,
'episodesubtitle', 'episodeinfo']`` episodesubtitle, episodeinfo ]
``options`` is a dict of options values to be passed to the transformations used
by the matcher.
The IterativeMatcher works mainly in 2 steps: The IterativeMatcher works mainly in 2 steps:
First, it splits the filename into a match_tree, which is a tree of groups First, it splits the filename into a match_tree, which is a tree of groups
which have a semantic meaning, such as episode number, movie title, which have a semantic meaning, such as episode number, movie title,
etc... etc...
The match_tree created looks like the following:: The match_tree created looks like the following:
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111 0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000 0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000 0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
__________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___ __________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
xxxxxttttttttttttt ffffff vvvv xxxxxx ll lll xx xxx ccc xxxxxttttttttttttt ffffff vvvv xxxxxx ll lll xx xxx ccc
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv [XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
The first 3 lines indicates the group index in which a char in the The first 3 lines indicates the group index in which a char in the
filename is located. So for instance, ``x264`` (in the middle) is the group (0, 4, 1), and filename is located. So for instance, x264 is the group (0, 4, 1), and
it corresponds to a video codec, denoted by the letter ``v`` in the 4th line. it corresponds to a video codec, denoted by the letter'v' in the 4th line.
(for more info, see guess.matchtree.to_string) (for more info, see guess.matchtree.to_string)
Second, it tries to merge all this information into a single object Second, it tries to merge all this information into a single object
containing all the found properties, and does some (basic) conflict containing all the found properties, and does some (basic) conflict
resolution when they arise. resolution when they arise.
"""
def __init__(self, filename, options=None, **kwargs):
options = dict(options or {}) When you create the Matcher, you can pass it:
for k, v in kwargs.items(): - a list 'opts' of option names, that act as global flags
if k not in options or not options[k]: - a dict 'transfo_opts' of { transfo_name: (transfo_args, transfo_kwargs) }
options[k] = v # options dict has priority over keyword arguments with which to call the transfo.process() function.
self._validate_options(options) """
valid_filetypes = ('autodetect', 'subtitle', 'info', 'video',
'movie', 'moviesubtitle', 'movieinfo',
'episode', 'episodesubtitle', 'episodeinfo')
if filetype not in valid_filetypes:
raise ValueError("filetype needs to be one of %s" % valid_filetypes)
if not PY3 and not isinstance(filename, unicode): if not PY3 and not isinstance(filename, unicode):
log.warning('Given filename to matcher is not unicode...') log.warning('Given filename to matcher is not unicode...')
filename = filename.decode('utf-8') filename = filename.decode('utf-8')
filename = normalize_unicode(filename) filename = normalize_unicode(filename)
if opts is None:
opts = []
if not isinstance(opts, list):
raise ValueError('opts must be a list of option names! Received: type=%s val=%s',
type(opts), opts)
if transfo_opts is None:
transfo_opts = {}
if not isinstance(transfo_opts, dict):
raise ValueError('transfo_opts must be a dict of { transfo_name: (args, kwargs) }. '+
'Received: type=%s val=%s', type(transfo_opts), transfo_opts)
self.match_tree = MatchTree(filename) self.match_tree = MatchTree(filename)
self.options = options
self._transfo_calls = []
# sanity check: make sure we don't process a (mostly) empty string # sanity check: make sure we don't process a (mostly) empty string
if clean_string(filename) == '': if clean_string(filename) == '':
return return
from guessit.plugins import transformers mtree = self.match_tree
mtree.guess.set('type', filetype, confidence=1.0)
try: def apply_transfo(transfo_name, *args, **kwargs):
mtree = self.match_tree transfo = __import__('guessit.transfo.' + transfo_name,
if 'type' in self.options: globals=globals(), locals=locals(),
mtree.guess.set('type', self.options['type'], confidence=0.0) fromlist=['process'], level=0)
default_args, default_kwargs = transfo_opts.get(transfo_name, ((), {}))
all_args = args or default_args
all_kwargs = dict(default_kwargs)
all_kwargs.update(kwargs) # keep all kwargs merged together
transfo.process(mtree, *all_args, **all_kwargs)
# Process # 1- first split our path into dirs + basename + ext
for transformer in transformers.all_transformers(): apply_transfo('split_path_components')
self._process(transformer, False)
# Post-process # 2- guess the file type now (will be useful later)
for transformer in transformers.all_transformers(): apply_transfo('guess_filetype', filetype)
self._process(transformer, True) if mtree.guess['type'] == 'unknown':
return
log.debug('Found match tree:\n%s' % u(mtree)) # 3- split each of those into explicit groups (separated by parentheses
except TransformerException as e: # or square brackets)
log.debug('An error has occured in Transformer %s: %s' % (e.transformer, e)) apply_transfo('split_explicit_groups')
def _process(self, transformer, post=False): # 4- try to match information for specific patterns
if not hasattr(transformer, 'should_process') or transformer.should_process(self.match_tree, self.options): # NOTE: order needs to comply to the following:
if post: # - website before language (eg: tvu.org.ru vs russian)
transformer.post_process(self.match_tree, self.options) # - language before episodes_rexps
else: # - properties before language (eg: he-aac vs hebrew)
transformer.process(self.match_tree, self.options) # - release_group before properties (eg: XviD-?? vs xvid)
self._transfo_calls.append(transformer) if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
'guess_properties', 'guess_language',
'guess_video_rexps',
'guess_episodes_rexps', 'guess_weak_episodes_rexps' ]
else:
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
'guess_properties', 'guess_language',
'guess_video_rexps' ]
@property if 'nolanguage' in opts:
def second_pass_options(self): strategy.remove('guess_language')
second_pass_options = {}
for transformer in self._transfo_calls:
if hasattr(transformer, 'second_pass_options'):
transformer_second_pass_options = transformer.second_pass_options(self.match_tree, self.options)
if transformer_second_pass_options:
second_pass_options.update(transformer_second_pass_options)
return second_pass_options
def _validate_options(self, options): for name in strategy:
valid_filetypes = ('subtitle', 'info', 'video', apply_transfo(name)
'movie', 'moviesubtitle', 'movieinfo',
'episode', 'episodesubtitle', 'episodeinfo')
type = options.get('type') # more guessers for both movies and episodes
if type and type not in valid_filetypes: apply_transfo('guess_bonus_features')
raise ValueError("filetype needs to be one of %s" % valid_filetypes) apply_transfo('guess_year', skip_first_year=('skip_first_year' in opts))
if 'nocountry' not in opts:
apply_transfo('guess_country')
apply_transfo('guess_idnumber')
# split into '-' separated subgroups (with required separator chars
# around the dash)
apply_transfo('split_on_dash')
# 5- try to identify the remaining unknown groups by looking at their
# position relative to other known elements
if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
apply_transfo('guess_episode_info_from_position')
else:
apply_transfo('guess_movie_title_from_position')
# 6- perform some post-processing steps
apply_transfo('post_process')
log.debug('Found match tree:\n%s' % u(mtree))
def matched(self): def matched(self):
return self.match_tree.matched() return self.match_tree.matched()
def found_property(node, name, value=None, confidence=1.0, update_guess=True, logger=None):
# automatically retrieve the log object from the caller frame
if not logger:
caller_frame = inspect.stack()[1][0]
logger = caller_frame.f_locals['self'].log
guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence)
return found_guess(node, guess, update_guess=update_guess, logger=logger)
def found_guess(node, guess, update_guess=True, logger=None):
if node.guess:
if update_guess:
node.guess.update_highest_confidence(guess)
else:
child = node.add_child(guess.metadata().span)
child.guess = guess
else:
node.guess = guess
log_found_guess(guess, logger)
return node.guess
def log_found_guess(guess, logger=None):
for k, v in guess.items():
(logger or log).debug('Property found: %s=%s (confidence=%.2f)' % (k, v, guess.confidence(k)))
class GuessFinder(object):
def __init__(self, guess_func, confidence=None, logger=None, options=None):
self.guess_func = guess_func
self.confidence = confidence
self.logger = logger or log
self.options = options
def process_nodes(self, nodes):
for node in nodes:
self.process_node(node)
def process_node(self, node, iterative=True, partial_span=None):
value = None
if partial_span:
value = node.value[partial_span[0]:partial_span[1]]
else:
value = node.value
string = ' %s ' % value # add sentinels
if not self.options:
matcher_result = self.guess_func(string, node)
else:
matcher_result = self.guess_func(string, node, self.options)
if matcher_result:
if not isinstance(matcher_result, Guess):
result, span = matcher_result
else:
result, span = matcher_result, matcher_result.metadata().span
if result:
# readjust span to compensate for sentinels
span = (span[0] - 1, span[1] - 1)
# readjust span to compensate for partial_span
if partial_span:
span = (span[0] + partial_span[0], span[1] + partial_span[0])
partition_spans = None
if self.options and 'skip_nodes' in self.options:
skip_nodes = self.options.get('skip_nodes')
for skip_node in skip_nodes:
if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
skip_node.span == span:
partition_spans = node.get_partition_spans(skip_node.span)
partition_spans.remove(skip_node.span)
break
if not partition_spans:
# restore sentinels compensation
guess = None
if isinstance(result, Guess):
guess = result
else:
guess = Guess(result, confidence=self.confidence, input=string, span=span)
if not iterative:
node.guess.update(guess)
else:
absolute_span = (span[0] + node.offset, span[1] + node.offset)
node.partition(span)
found_child = None
for child in node.children:
if child.span == absolute_span:
found_guess(child, guess, self.logger)
found_child = child
break
for child in node.children:
if not child is found_child:
self.process_node(child)
else:
for partition_span in partition_spans:
self.process_node(node, partial_span=partition_span)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,14 +18,12 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import UnicodeMixin, base_text_type, Guess
import guessit # @UnusedImport needed for doctests
from guessit import UnicodeMixin, base_text_type
from guessit.textutils import clean_string, str_fill from guessit.textutils import clean_string, str_fill
from guessit.patterns import group_delimiters from guessit.patterns import group_delimiters
from guessit.guess import (merge_similar_guesses, merge_all, from guessit.guess import (merge_similar_guesses, merge_all,
choose_int, choose_string, Guess) choose_int, choose_string)
import copy import copy
import logging import logging
@ -33,45 +31,8 @@ log = logging.getLogger(__name__)
class BaseMatchTree(UnicodeMixin): class BaseMatchTree(UnicodeMixin):
"""A BaseMatchTree is a tree covering the filename, where each """A MatchTree represents the hierarchical split of a string into its
node represents a substring in the filename and can have a ``Guess`` constituent semantic groups."""
associated with it that contains the information that has been guessed
in this node. Nodes can be further split into subnodes until a proper
split has been found.
Each node has the following attributes:
- string = the original string of which this node represents a region
- span = a pair of (begin, end) indices delimiting the substring
- parent = parent node
- children = list of children nodes
- guess = Guess()
BaseMatchTrees are displayed in the following way:
>>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
>>> print(guessit.IterativeMatcher(path).match_tree)
000000 1111111111111111 2222222222222222222222222222222222222222222 333
000000 0000000000111111 0000000000111111222222222222222222222222222 000
011112 011112000011111222222222222222222 000
011112222222222222
0000011112222
01112 0111
Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
tttttttttt yyyy yyyy fffff ssss aaa vvvv rrr ccc
Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
The last line contains the filename, which you can use a reference.
The previous line contains the type of property that has been found.
The line before that contains the filename, where all the found groups
have been blanked. Basically, what is left on this line are the leftover
groups which could not be identified.
The lines before that indicate the indices of the groups in the tree.
For instance, the part of the filename 'BDRip' is the leaf with index
``(2, 2, 1)`` (read from top to bottom), and its meaning is 'format'
(as shown by the ``f``'s on the last-but-one line).
"""
def __init__(self, string='', span=None, parent=None): def __init__(self, string='', span=None, parent=None):
self.string = string self.string = string
@ -82,14 +43,10 @@ class BaseMatchTree(UnicodeMixin):
@property @property
def value(self): def value(self):
"""Return the substring that this node matches."""
return self.string[self.span[0]:self.span[1]] return self.string[self.span[0]:self.span[1]]
@property @property
def clean_value(self): def clean_value(self):
"""Return a cleaned value of the matched substring, with better
presentation formatting (punctuation marks removed, duplicate
spaces, ...)"""
return clean_string(self.value) return clean_string(self.value)
@property @property
@ -98,8 +55,6 @@ class BaseMatchTree(UnicodeMixin):
@property @property
def info(self): def info(self):
"""Return a dict containing all the info guessed by this node,
subnodes included."""
result = dict(self.guess) result = dict(self.guess)
for c in self.children: for c in self.children:
@ -109,7 +64,6 @@ class BaseMatchTree(UnicodeMixin):
@property @property
def root(self): def root(self):
"""Return the root node of the tree."""
if not self.parent: if not self.parent:
return self return self
@ -117,43 +71,28 @@ class BaseMatchTree(UnicodeMixin):
@property @property
def depth(self): def depth(self):
"""Return the depth of this node."""
if self.is_leaf(): if self.is_leaf():
return 0 return 0
return 1 + max(c.depth for c in self.children) return 1 + max(c.depth for c in self.children)
def is_leaf(self): def is_leaf(self):
"""Return whether this node is a leaf or not."""
return self.children == [] return self.children == []
def add_child(self, span): def add_child(self, span):
"""Add a new child node to this node with the given span."""
child = MatchTree(self.string, span=span, parent=self) child = MatchTree(self.string, span=span, parent=self)
self.children.append(child) self.children.append(child)
return child
def get_partition_spans(self, indices): def partition(self, indices):
"""Return the list of absolute spans for the regions of the original
string defined by splitting this node at the given indices (relative
to this node)"""
indices = sorted(indices) indices = sorted(indices)
if indices[0] != 0: if indices[0] != 0:
indices.insert(0, 0) indices.insert(0, 0)
if indices[-1] != len(self.value): if indices[-1] != len(self.value):
indices.append(len(self.value)) indices.append(len(self.value))
spans = []
for start, end in zip(indices[:-1], indices[1:]): for start, end in zip(indices[:-1], indices[1:]):
spans.append((self.offset + start, self.add_child(span=(self.offset + start,
self.offset + end)) self.offset + end))
return spans
def partition(self, indices):
"""Partition this node by splitting it at the given indices,
relative to this node."""
for partition_span in self.get_partition_spans(indices):
self.add_child(span=partition_span)
def split_on_components(self, components): def split_on_components(self, components):
offset = 0 offset = 0
@ -165,7 +104,6 @@ class BaseMatchTree(UnicodeMixin):
offset = end offset = end
def nodes_at_depth(self, depth): def nodes_at_depth(self, depth):
"""Return all the nodes at a given depth in the tree"""
if depth == 0: if depth == 0:
yield self yield self
@ -175,32 +113,26 @@ class BaseMatchTree(UnicodeMixin):
@property @property
def node_idx(self): def node_idx(self):
"""Return this node's index in the tree, as a tuple.
If this node is the root of the tree, then return ()."""
if self.parent is None: if self.parent is None:
return () return ()
return self.parent.node_idx + (self.parent.children.index(self),) return self.parent.node_idx + (self.parent.children.index(self),)
def node_at(self, idx): def node_at(self, idx):
"""Return the node at the given index in the subtree rooted at
this node."""
if not idx: if not idx:
return self return self
try: try:
return self.children[idx[0]].node_at(idx[1:]) return self.children[idx[0]].node_at(idx[1:])
except IndexError: except:
raise ValueError('Non-existent node index: %s' % (idx,)) raise ValueError('Non-existent node index: %s' % (idx,))
def nodes(self): def nodes(self):
"""Return all the nodes and subnodes in this tree."""
yield self yield self
for child in self.children: for child in self.children:
for node in child.nodes(): for node in child.nodes():
yield node yield node
def _leaves(self): def _leaves(self):
"""Return a generator over all the nodes that are leaves."""
if self.is_leaf(): if self.is_leaf():
yield self yield self
else: else:
@ -209,73 +141,10 @@ class BaseMatchTree(UnicodeMixin):
for leaf in child._leaves(): for leaf in child._leaves():
yield leaf yield leaf
def group_node(self):
return self._other_group_node(0)
def previous_group_node(self):
return self._other_group_node(-1)
def next_group_node(self):
return self._other_group_node(+1)
def _other_group_node(self, offset):
if len(self.node_idx) > 1:
group_idx = self.node_idx[:2]
if group_idx[1] + offset >= 0:
other_group_idx = (group_idx[0], group_idx[1] + offset)
try:
other_group_node = self.root.node_at(other_group_idx)
return other_group_node
except ValueError:
pass
return None
def leaves(self): def leaves(self):
"""Return a list of all the nodes that are leaves."""
return list(self._leaves()) return list(self._leaves())
def previous_leaf(self, leaf):
"""Return previous leaf for this node"""
return self._other_leaf(leaf, -1)
def next_leaf(self, leaf):
"""Return next leaf for this node"""
return self._other_leaf(leaf, +1)
def _other_leaf(self, leaf, offset):
leaves = self.leaves()
index = leaves.index(leaf) + offset
if index > 0 and index < len(leaves):
return leaves[index]
return None
def previous_leaves(self, leaf):
"""Return previous leaves for this node"""
leaves = self.leaves()
index = leaves.index(leaf)
if index > 0 and index < len(leaves):
previous_leaves = leaves[:index]
previous_leaves.reverse()
return previous_leaves
return []
def next_leaves(self, leaf):
"""Return next leaves for this node"""
leaves = self.leaves()
index = leaves.index(leaf)
if index > 0 and index < len(leaves):
return leaves[index + 1:len(leaves)]
return []
def to_string(self): def to_string(self):
"""Return a readable string representation of this tree.
The result is a multi-line string, where the lines are:
- line 1 -> N-2: each line contains the nodes at the given depth in the tree
- line N-2: original string where all the found groups have been blanked
- line N-1: type of property that has been found
- line N: the original string, which you can use a reference.
"""
empty_line = ' ' * len(self.string) empty_line = ' ' * len(self.string)
def to_hex(x): def to_hex(x):
@ -284,27 +153,23 @@ class BaseMatchTree(UnicodeMixin):
return x return x
def meaning(result): def meaning(result):
mmap = {'episodeNumber': 'E', mmap = { 'episodeNumber': 'E',
'season': 'S', 'season': 'S',
'extension': 'e', 'extension': 'e',
'format': 'f', 'format': 'f',
'language': 'l', 'language': 'l',
'country': 'C', 'country': 'C',
'videoCodec': 'v', 'videoCodec': 'v',
'videoProfile': 'v', 'audioCodec': 'a',
'audioCodec': 'a', 'website': 'w',
'audioProfile': 'a', 'container': 'c',
'audioChannels': 'a', 'series': 'T',
'website': 'w', 'title': 't',
'container': 'c', 'date': 'd',
'series': 'T', 'year': 'y',
'title': 't', 'releaseGroup': 'r',
'date': 'd', 'screenSize': 's'
'year': 'y', }
'releaseGroup': 'r',
'screenSize': 's',
'other': 'o'
}
if result is None: if result is None:
return ' ' return ' '
@ -315,7 +180,7 @@ class BaseMatchTree(UnicodeMixin):
return 'x' return 'x'
lines = [empty_line] * (self.depth + 2) # +2: remaining, meaning lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
lines[-2] = self.string lines[-2] = self.string
for node in self.nodes(): for node in self.nodes():
@ -333,22 +198,16 @@ class BaseMatchTree(UnicodeMixin):
lines.append(self.string) lines.append(self.string)
return '\n'.join(l.rstrip() for l in lines) return '\n'.join(lines)
def __unicode__(self): def __unicode__(self):
return self.to_string() return self.to_string()
def __repr__(self):
return '<MatchTree: root=%s>' % self.value
class MatchTree(BaseMatchTree): class MatchTree(BaseMatchTree):
"""The MatchTree contains a few "utility" methods which are not necessary """The MatchTree contains a few "utility" methods which are not necessary
for the BaseMatchTree, but add a lot of convenience for writing for the BaseMatchTree, but add a lot of convenience for writing
higher-level rules. higher-level rules."""
"""
_matched_result = None
def _unidentified_leaves(self, def _unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) >= 2): valid=lambda leaf: len(leaf.clean_value) >= 2):
@ -358,12 +217,11 @@ class MatchTree(BaseMatchTree):
def unidentified_leaves(self, def unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) >= 2): valid=lambda leaf: len(leaf.clean_value) >= 2):
"""Return a list of leaves that are not empty."""
return list(self._unidentified_leaves(valid)) return list(self._unidentified_leaves(valid))
def _leaves_containing(self, property_name): def _leaves_containing(self, property_name):
if isinstance(property_name, base_text_type): if isinstance(property_name, base_text_type):
property_name = [property_name] property_name = [ property_name ]
for leaf in self._leaves(): for leaf in self._leaves():
for prop in property_name: for prop in property_name:
@ -372,11 +230,9 @@ class MatchTree(BaseMatchTree):
break break
def leaves_containing(self, property_name): def leaves_containing(self, property_name):
"""Return a list of leaves that guessed the given property."""
return list(self._leaves_containing(property_name)) return list(self._leaves_containing(property_name))
def first_leaf_containing(self, property_name): def first_leaf_containing(self, property_name):
"""Return the first leaf containing the given property."""
try: try:
return next(self._leaves_containing(property_name)) return next(self._leaves_containing(property_name))
except StopIteration: except StopIteration:
@ -389,8 +245,6 @@ class MatchTree(BaseMatchTree):
yield leaf yield leaf
def previous_unidentified_leaves(self, node): def previous_unidentified_leaves(self, node):
"""Return a list of non-empty leaves that are before the given
node (in the string)."""
return list(self._previous_unidentified_leaves(node)) return list(self._previous_unidentified_leaves(node))
def _previous_leaves_containing(self, node, property_name): def _previous_leaves_containing(self, node, property_name):
@ -400,8 +254,6 @@ class MatchTree(BaseMatchTree):
yield leaf yield leaf
def previous_leaves_containing(self, node, property_name): def previous_leaves_containing(self, node, property_name):
"""Return a list of leaves containing the given property that are
before the given node (in the string)."""
return list(self._previous_leaves_containing(node, property_name)) return list(self._previous_leaves_containing(node, property_name))
def is_explicit(self): def is_explicit(self):
@ -410,30 +262,26 @@ class MatchTree(BaseMatchTree):
return (self.value[0] + self.value[-1]) in group_delimiters return (self.value[0] + self.value[-1]) in group_delimiters
def matched(self): def matched(self):
"""Return a single guess that contains all the info found in the # we need to make a copy here, as the merge functions work in place and
nodes of this tree, trying to merge properties as good as possible. # calling them on the match tree would modify it
""" parts = [node.guess for node in self.nodes() if node.guess]
if not self._matched_result: parts = copy.deepcopy(parts)
# we need to make a copy here, as the merge functions work in place and
# calling them on the match tree would modify it
parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]
# 1- try to merge similar information together and give it a higher # 1- try to merge similar information together and give it a higher
# confidence # confidence
for int_part in ('year', 'season', 'episodeNumber'): for int_part in ('year', 'season', 'episodeNumber'):
merge_similar_guesses(parts, int_part, choose_int) merge_similar_guesses(parts, int_part, choose_int)
for string_part in ('title', 'series', 'container', 'format', for string_part in ('title', 'series', 'container', 'format',
'releaseGroup', 'website', 'audioCodec', 'releaseGroup', 'website', 'audioCodec',
'videoCodec', 'screenSize', 'episodeFormat', 'videoCodec', 'screenSize', 'episodeFormat',
'audioChannels', 'idNumber'): 'audioChannels', 'idNumber'):
merge_similar_guesses(parts, string_part, choose_string) merge_similar_guesses(parts, string_part, choose_string)
# 2- merge the rest, potentially discarding information not properly # 2- merge the rest, potentially discarding information not properly
# merged before # merged before
result = merge_all(parts, result = merge_all(parts,
append=['language', 'subtitleLanguage', 'other', 'special']) append=['language', 'subtitleLanguage', 'other'])
log.debug('Final result: ' + result.nice_string()) log.debug('Final result: ' + result.nice_string())
self._matched_result = result return result
return self._matched_result

View file

@ -1,25 +0,0 @@
from optparse import OptionParser
option_parser = OptionParser(usage='usage: %prog [options] file1 [file2...]')
option_parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
help='display debug output')
option_parser.add_option('-p', '--properties', dest='properties', action='store_true', default=False,
help='Display properties that can be guessed.')
option_parser.add_option('-l', '--values', dest='values', action='store_true', default=False,
help='Display property values that can be guessed.')
option_parser.add_option('-s', '--transformers', dest='transformers', action='store_true', default=False,
help='Display transformers that can be used.')
option_parser.add_option('-i', '--info', dest='info', default='filename',
help='the desired information type: filename, hash_mpc or a hash from python\'s '
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
'them, comma-separated')
option_parser.add_option('-n', '--name-only', dest='name_only', action='store_true', default=False,
help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.')
option_parser.add_option('-t', '--type', dest='type', default=None,
help='the suggested file type: movie, episode. If undefined, type will be guessed.')
option_parser.add_option('-a', '--advanced', dest='advanced', action='store_true', default=False,
help='display advanced information for filename guesses, as json output')
option_parser.add_option('-y', '--yaml', dest='yaml', action='store_true', default=False,
help='display information for filename guesses as yaml output (like unit-test)')
option_parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
help='run a few builtin tests instead of analyzing a file')

250
lib/guessit/patterns.py Normal file
View file

@ -0,0 +1,250 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
import re
subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ]
info_exts = [ 'nfo' ]
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv']
group_delimiters = [ '()', '[]', '{}' ]
# separator character regexp
sep = r'[][,)(}{+ /\._-]' # regexp art, hehe :D
# character used to represent a deleted char (when matching groups)
deleted = '_'
# format: [ (regexp, confidence, span_adjust) ]
episode_rexps = [ # ... Season 2 ...
(r'season (?P<season>[0-9]+)', 1.0, (0, 0)),
(r'saison (?P<season>[0-9]+)', 1.0, (0, 0)),
# ... s02e13 ...
(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<episodeNumber>(?:-?[eE-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
# ... s03-x02 ... # FIXME: redundant? remove it?
#(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
# ... 2x13 ...
(r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
# ... s02 ...
#(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)),
(r's(?P<season>[0-9]{1,2})[^0-9]', 0.6, (0, -1)),
# v2 or v3 for some mangas which have multiples rips
(r'(?P<episodeNumber>[0-9]{1,3})v[23]' + sep, 0.6, (0, 0)),
# ... ep 23 ...
('ep' + sep + r'(?P<episodeNumber>[0-9]{1,2})[^0-9]', 0.7, (0, -1)),
# ... e13 ... for a mini-series without a season number
(sep + r'e(?P<episodeNumber>[0-9]{1,2})' + sep, 0.6, (1, -1))
]
weak_episode_rexps = [ # ... 213 or 0106 ...
(sep + r'(?P<episodeNumber>[0-9]{2,4})' + sep, (1, -1))
]
non_episode_title = [ 'extras', 'rip' ]
video_rexps = [ # cd number
(r'cd ?(?P<cdNumber>[0-9])( ?of ?(?P<cdNumberTotal>[0-9]))?', 1.0, (0, 0)),
(r'(?P<cdNumberTotal>[1-9]) cds?', 0.9, (0, 0)),
# special editions
(r'edition' + sep + r'(?P<edition>collector)', 1.0, (0, 0)),
(r'(?P<edition>collector)' + sep + 'edition', 1.0, (0, 0)),
(r'(?P<edition>special)' + sep + 'edition', 1.0, (0, 0)),
(r'(?P<edition>criterion)' + sep + 'edition', 1.0, (0, 0)),
# director's cut
(r"(?P<edition>director'?s?" + sep + "cut)", 1.0, (0, 0)),
# video size
(r'(?P<width>[0-9]{3,4})x(?P<height>[0-9]{3,4})', 0.9, (0, 0)),
# website
(r'(?P<website>www(\.[a-zA-Z0-9]+){2,3})', 0.8, (0, 0)),
# bonusNumber: ... x01 ...
(r'x(?P<bonusNumber>[0-9]{1,2})', 1.0, (0, 0)),
# filmNumber: ... f01 ...
(r'f(?P<filmNumber>[0-9]{1,2})', 1.0, (0, 0))
]
websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com',
'sharethefiles.com' ]
unlikely_series = [ 'series' ]
# prop_multi is a dict of { property_name: { canonical_form: [ pattern ] } }
# pattern is a string considered as a regexp, with the addition that dashes are
# replaced with '([ \.-_])?' which matches more types of separators (or none)
# note: simpler patterns need to be at the end of the list to not shadow more
# complete ones, eg: 'AAC' needs to come after 'He-AAC'
# ie: from most specific to less specific
prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ],
'HD-DVD': [ 'HD-(?:DVD)?-Rip', 'HD-DVD' ],
'BluRay': [ 'Blu-ray', 'B[DR]Rip' ],
'HDTV': [ 'HD-TV' ],
'DVB': [ 'DVB-Rip', 'DVB', 'PD-TV' ],
'WEBRip': [ 'WEB-Rip' ],
'Screener': [ 'DVD-SCR', 'Screener' ],
'VHS': [ 'VHS' ],
'WEB-DL': [ 'WEB-DL' ] },
'is3D': { True: [ '3D' ] },
'screenSize': { '480p': [ '480[pi]?' ],
'720p': [ '720[pi]?' ],
'1080i': [ '1080i' ],
'1080p': [ '1080p', '1080[^i]' ] },
'videoCodec': { 'XviD': [ 'Xvid' ],
'DivX': [ 'DVDivX', 'DivX' ],
'h264': [ '[hx]-264' ],
'Rv10': [ 'Rv10' ],
'Mpeg2': [ 'Mpeg2' ] },
# has nothing to do here (or on filenames for that matter), but some
# releases use it and it helps to identify release groups, so we adapt
'videoApi': { 'DXVA': [ 'DXVA' ] },
'audioCodec': { 'AC3': [ 'AC3' ],
'DTS': [ 'DTS' ],
'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] },
'audioChannels': { '5.1': [ r'5\.1', 'DD5[._ ]1', '5ch' ] },
'episodeFormat': { 'Minisode': [ 'Minisodes?' ] }
}
# prop_single dict of { property_name: [ canonical_form ] }
prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA',
'CHD', 'ViTE', 'TLF', 'FLAiTE',
'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS',
'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL',
'CtrlHD', 'POD', 'WiKi','IMMERSE', 'FQM',
'2HD', 'CTU', 'HALCYON', 'EbP', 'SiTV',
'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV',
'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3',
'TrollHD', 'ECI'
],
# potentially confusing release group names (they are words)
'weakReleaseGroup': [ 'DEiTY', 'FiNaLe', 'UnSeeN', 'KiNGS', 'CLUE', 'DIMENSION',
'SAiNTS', 'ARROW', 'EuReKA', 'SiNNERS', 'DiRTY', 'REWARD',
'REPTiLE',
],
'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5',
'complete', 'classic', # not so sure about these ones, could appear in a title
'ws' ] # widescreen
}
_dash = '-'
_psep = '[-. _]?'
def _to_rexp(prop):
return re.compile(prop.replace(_dash, _psep), re.IGNORECASE)
# properties_rexps dict of { property_name: { canonical_form: [ rexp ] } }
# containing the rexps compiled from both prop_multi and prop_single
properties_rexps = dict((type, dict((canonical_form,
[ _to_rexp(pattern) for pattern in patterns ])
for canonical_form, patterns in props.items()))
for type, props in prop_multi.items())
properties_rexps.update(dict((type, dict((canonical_form, [ _to_rexp(canonical_form) ])
for canonical_form in props))
for type, props in prop_single.items()))
def find_properties(string):
result = []
for property_name, props in properties_rexps.items():
# FIXME: this should be done in a more flexible way...
if property_name in ['weakReleaseGroup']:
continue
for canonical_form, rexps in props.items():
for value_rexp in rexps:
match = value_rexp.search(string)
if match:
start, end = match.span()
# make sure our word is always surrounded by separators
# note: sep is a regexp, but in this case using it as
# a char sequence achieves the same goal
if ((start > 0 and string[start-1] not in sep) or
(end < len(string) and string[end] not in sep)):
continue
result.append((property_name, canonical_form, start, end))
return result
property_synonyms = { 'Special Edition': [ 'Special' ],
'Collector Edition': [ 'Collector' ],
'Criterion Edition': [ 'Criterion' ]
}
def revert_synonyms():
reverse = {}
for canonical, synonyms in property_synonyms.items():
for synonym in synonyms:
reverse[synonym.lower()] = canonical
return reverse
reverse_synonyms = revert_synonyms()
def canonical_form(string):
return reverse_synonyms.get(string.lower(), string)
def compute_canonical_form(property_name, value):
"""Return the canonical form of a property given its type if it is a valid
one, None otherwise."""
if isinstance(value, basestring):
for canonical_form, rexps in properties_rexps[property_name].items():
for rexp in rexps:
if rexp.match(value):
return canonical_form
return None

View file

@ -1,77 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import re
from guessit import base_text_type
group_delimiters = ['()', '[]', '{}']
# separator character regexp
sep = r'[][,)(}:{+ /\._-]' # regexp art, hehe :D
_dash = '-'
_psep = '[\W_]?'
def build_or_pattern(patterns):
"""Build a or pattern string from a list of possible patterns
"""
or_pattern = ''
for pattern in patterns:
if not or_pattern:
or_pattern += '(?:'
else:
or_pattern += '|'
or_pattern += ('(?:%s)' % pattern)
or_pattern += ')'
return or_pattern
def compile_pattern(pattern, enhance=True):
"""Compile and enhance a pattern
:param pattern: Pattern to compile (regexp).
:type pattern: string
:param pattern: Enhance pattern before compiling.
:type pattern: string
:return: The compiled pattern
:rtype: regular expression object
"""
return re.compile(enhance_pattern(pattern) if enhance else pattern, re.IGNORECASE)
def enhance_pattern(pattern):
"""Enhance pattern to match more equivalent values.
'-' are replaced by '[\W_]?', which matches more types of separators (or none)
:param pattern: Pattern to enhance (regexp).
:type pattern: string
:return: The enhanced pattern
:rtype: string
"""
return pattern.replace(_dash, _psep)

View file

@ -1,32 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
subtitle_exts = ['srt', 'idx', 'sub', 'ssa']
info_exts = ['nfo']
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
'iso']

View file

@ -1,150 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import re
digital_numeral = '\d{1,3}'
roman_numeral = "(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})"
english_word_numeral_list = [
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
]
french_word_numeral_list = [
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
]
french_alt_word_numeral_list = [
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
]
def __build_word_numeral(*args, **kwargs):
re = None
for word_list in args:
for word in word_list:
if not re:
re = '(?:(?=\w+)'
else:
re += '|'
re += word
re += ')'
return re
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
__romanNumeralMap = (
('M', 1000),
('CM', 900),
('D', 500),
('CD', 400),
('C', 100),
('XC', 90),
('L', 50),
('XL', 40),
('X', 10),
('IX', 9),
('V', 5),
('IV', 4),
('I', 1)
)
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
def __parse_roman(value):
"""convert Roman numeral to integer"""
if not __romanNumeralPattern.search(value):
raise ValueError('Invalid Roman numeral: %s' % value)
result = 0
index = 0
for numeral, integer in __romanNumeralMap:
while value[index:index + len(numeral)] == numeral:
result += integer
index += len(numeral)
return result
def __parse_word(value):
"""Convert Word numeral to integer"""
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
try:
return word_list.index(value)
except ValueError:
pass
raise ValueError
_clean_re = re.compile('[^\d]*(\d+)[^\d]*')
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
"""Parse a numeric value into integer.
input can be an integer as a string, a roman numeral or a word
:param value: Value to parse. Can be an integer, roman numeral or word.
:type value: string
:return: Numeric value, or None if value can't be parsed
:rtype: int
"""
if int_enabled:
try:
if clean:
match = _clean_re.match(value)
if match:
clean_value = match.group(1)
return int(clean_value)
return int(value)
except ValueError:
pass
if roman_enabled:
try:
if clean:
for word in value.split():
try:
return __parse_roman(word)
except ValueError:
pass
return __parse_roman(value)
except ValueError:
pass
if word_enabled:
try:
if clean:
for word in value.split():
try:
return __parse_word(word)
except ValueError:
pass
return __parse_word(value)
except ValueError:
pass
raise ValueError('Invalid numeral: ' + value)

View file

@ -1,21 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals

View file

@ -1,186 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from stevedore import ExtensionManager
from pkg_resources import EntryPoint
from stevedore.extension import Extension
from logging import getLogger
log = getLogger(__name__)
class Transformer(object): # pragma: no cover
def __init__(self, priority=0):
self.priority = priority
self.log = getLogger(self.name)
@property
def name(self):
return self.__class__.__name__
def supported_properties(self):
return {}
def second_pass_options(self, mtree, options=None):
return None
def should_process(self, mtree, options=None):
return True
def process(self, mtree, options=None):
pass
def post_process(self, mtree, options=None):
pass
def rate_quality(self, guess, *props):
return 0
class CustomTransformerExtensionManager(ExtensionManager):
def __init__(self, namespace='guessit.transformer', invoke_on_load=True,
invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None,
verify_requirements=False):
super(CustomTransformerExtensionManager, self).__init__(namespace=namespace,
invoke_on_load=invoke_on_load,
invoke_args=invoke_args,
invoke_kwds=invoke_kwds,
propagate_map_exceptions=propagate_map_exceptions,
on_load_failure_callback=on_load_failure_callback,
verify_requirements=verify_requirements)
def order_extensions(self, extensions):
"""Order the loaded transformers
It should follow those rules
- website before language (eg: tvu.org.ru vs russian)
- language before episodes_rexps
- properties before language (eg: he-aac vs hebrew)
- release_group before properties (eg: XviD-?? vs xvid)
"""
extensions.sort(key=lambda ext: -ext.obj.priority)
return extensions
def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
if not ep.dist:
plugin = ep.load(require=False)
else:
plugin = ep.load(require=verify_requirements)
if invoke_on_load:
obj = plugin(*invoke_args, **invoke_kwds)
else:
obj = None
return Extension(ep.name, ep, plugin, obj)
def _load_plugins(self, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
return self.order_extensions(super(CustomTransformerExtensionManager, self)._load_plugins(invoke_on_load, invoke_args, invoke_kwds, verify_requirements))
def objects(self):
return self.map(self._get_obj)
def _get_obj(self, ext):
return ext.obj
def object(self, name):
try:
return self[name].obj
except KeyError:
return None
def register_module(self, name, module_name):
ep = EntryPoint(name, module_name)
loaded = self._load_one_plugin(ep, invoke_on_load=True, invoke_args=(), invoke_kwds={})
if loaded:
self.extensions.append(loaded)
self.extensions = self.order_extensions(self.extensions)
self._extensions_by_name = None
class DefaultTransformerExtensionManager(CustomTransformerExtensionManager):
@property
def _internal_entry_points(self):
return ['split_path_components = guessit.transfo.split_path_components:SplitPathComponents',
'guess_filetype = guessit.transfo.guess_filetype:GuessFiletype',
'split_explicit_groups = guessit.transfo.split_explicit_groups:SplitExplicitGroups',
'guess_date = guessit.transfo.guess_date:GuessDate',
'guess_website = guessit.transfo.guess_website:GuessWebsite',
'guess_release_group = guessit.transfo.guess_release_group:GuessReleaseGroup',
'guess_properties = guessit.transfo.guess_properties:GuessProperties',
'guess_language = guessit.transfo.guess_language:GuessLanguage',
'guess_video_rexps = guessit.transfo.guess_video_rexps:GuessVideoRexps',
'guess_episodes_rexps = guessit.transfo.guess_episodes_rexps:GuessEpisodesRexps',
'guess_weak_episodes_rexps = guessit.transfo.guess_weak_episodes_rexps:GuessWeakEpisodesRexps',
'guess_bonus_features = guessit.transfo.guess_bonus_features:GuessBonusFeatures',
'guess_year = guessit.transfo.guess_year:GuessYear',
'guess_country = guessit.transfo.guess_country:GuessCountry',
'guess_idnumber = guessit.transfo.guess_idnumber:GuessIdnumber',
'split_on_dash = guessit.transfo.split_on_dash:SplitOnDash',
'guess_episode_info_from_position = guessit.transfo.guess_episode_info_from_position:GuessEpisodeInfoFromPosition',
'guess_movie_title_from_position = guessit.transfo.guess_movie_title_from_position:GuessMovieTitleFromPosition',
'guess_episode_special = guessit.transfo.guess_episode_special:GuessEpisodeSpecial']
def _find_entry_points(self, namespace):
entry_points = {}
# Internal entry points
if namespace == self.namespace:
for internal_entry_point_str in self._internal_entry_points:
internal_entry_point = EntryPoint.parse(internal_entry_point_str)
entry_points[internal_entry_point.name] = internal_entry_point
# Package entry points
setuptools_entrypoints = super(DefaultTransformerExtensionManager, self)._find_entry_points(namespace)
for setuptools_entrypoint in setuptools_entrypoints:
entry_points[setuptools_entrypoint.name] = setuptools_entrypoint
return list(entry_points.values())
_extensions = None
def all_transformers():
return _extensions.objects()
def get_transformer(name):
return _extensions.object(name)
def add_transformer(name, module_name):
_extensions.register_module(name, module_name)
def reload(custom=False):
"""
Reload extension manager with default or custom one.
:param custom: if True, custom manager will be used, else default one.
Default manager will load default extensions from guessit and setuptools packaging extensions
Custom manager will not load default extensions from guessit, using only setuptools packaging extensions.
:type custom: boolean
"""
global _extensions
if custom:
_extensions = CustomTransformerExtensionManager()
else:
_extensions = DefaultTransformerExtensionManager()
reload()

View file

@ -1,65 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import all_transformers
def best_quality_properties(props, *guesses):
"""Retrieve the best quality guess, based on given properties
:param props: Properties to include in the rating
:type props: list of strings
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
for transformer in all_transformers():
rate = transformer.rate_quality(guess, *props)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess
def best_quality(*guesses):
"""Retrieve the best quality guess.
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
for transformer in all_transformers():
rate = transformer.rate_quality(guess)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess

View file

@ -1,28 +1,28 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # Smewt - A smart collection manager
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # Smewt is free software; you can redistribute it and/or modify
# the terms of the Lesser GNU General Public License as published by # it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or # the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version. # (at your option) any later version.
# #
# GuessIt is distributed in the hope that it will be useful, # Smewt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details. # GNU General Public License for more details.
# #
# You should have received a copy of the Lesser GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
import logging import logging
import sys import sys
import os import os, os.path
GREEN_FONT = "\x1B[0;32m" GREEN_FONT = "\x1B[0;32m"
YELLOW_FONT = "\x1B[0;33m" YELLOW_FONT = "\x1B[0;33m"
@ -31,7 +31,7 @@ RED_FONT = "\x1B[0;31m"
RESET_FONT = "\x1B[0m" RESET_FONT = "\x1B[0m"
def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): # pragma: no cover def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):
"""Set up a nice colored logger as the main application logger.""" """Set up a nice colored logger as the main application logger."""
class SimpleFormatter(logging.Formatter): class SimpleFormatter(logging.Formatter):

View file

@ -1,26 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
from guessit.slogging import setupLogging
setupLogging()
logging.disable(logging.INFO)

View file

@ -1,40 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test import (test_api, test_autodetect, test_autodetect_all, test_doctests,
test_episode, test_hashes, test_language, test_main,
test_matchtree, test_movie, test_quality, test_utils)
from unittest import TextTestRunner
import logging
def main():
for suite in [test_api.suite, test_autodetect.suite,
test_autodetect_all.suite, test_doctests.suite,
test_episode.suite, test_hashes.suite, test_language.suite,
test_main.suite, test_matchtree.suite, test_movie.suite,
test_quality.suite, test_utils.suite]:
TextTestRunner(verbosity=2).run(suite)
if __name__ == '__main__':
main()

View file

@ -1,289 +0,0 @@
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
: type: movie
title: Fear and Loathing in Las Vegas
year: 1998
screenSize: 720p
format: HD-DVD
audioCodec: DTS
videoCodec: h264
releaseGroup: ESiR
? Leopard.dmg
: type: unknown
extension: dmg
? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
: type: episode
series: Duckman
season: 1
episodeNumber: 1
title: I, Duckman
date: 2002-11-07
? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
: type: episode
series: Neverwhere
episodeNumber: 5
title: Down Street
website: tvu.org.ru
? Neverwhere.05.Down.Street.[tvu.org.ru].avi
: type: episode
series: Neverwhere
episodeNumber: 5
title: Down Street
website: tvu.org.ru
? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
: type: episode
series: Breaking Bad
episodeFormat: Minisode
episodeNumber: 1
title: Good Cop Bad Cop
format: WEBRip
videoCodec: XviD
? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
: type: episode
series: Kaamelott
episodeNumber: 23
title: Le Forfait
? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
: type: movie
title: The Doors
year: 1991
date: 2008-03-09
format: BluRay
screenSize: 720p
audioCodec: AC3
videoCodec: h264
releaseGroup: HiS@SiLUHD
language: english
website: sharethefiles.com
? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
: type: movie
title: M.A.S.H.
year: 1970
videoCodec: DivX
format: DVD
? the.mentalist.501.hdtv-lol.mp4
: type: episode
series: The Mentalist
season: 5
episodeNumber: 1
format: HDTV
releaseGroup: LOL
? the.simpsons.2401.hdtv-lol.mp4
: type: episode
series: The Simpsons
season: 24
episodeNumber: 1
format: HDTV
releaseGroup: LOL
? Homeland.S02E01.HDTV.x264-EVOLVE.mp4
: type: episode
series: Homeland
season: 2
episodeNumber: 1
format: HDTV
videoCodec: h264
releaseGroup: EVOLVE
? /media/Band_of_Brothers-e01-Currahee.mkv
: type: episode
series: Band of Brothers
episodeNumber: 1
title: Currahee
? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
: type: episode
series: Band of Brothers
bonusNumber: 2
bonusTitle: We Stand Alone Together
? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
: type: movie
title: Casino Royale
filmSeries: James Bond
filmNumber: 21
bonusNumber: 2
bonusTitle: Stunts
? /TV Shows/new.girl.117.hdtv-lol.mp4
: type: episode
series: New Girl
season: 1
episodeNumber: 17
format: HDTV
releaseGroup: LOL
? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
: type: episode
series: The Office (US)
country: US
season: 1
episodeNumber: 3
title: Health Care
format: HDTV
videoCodec: XviD
releaseGroup: LOL
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
: type: movie
title: The Insider
year: 1999
bonusNumber: 2
bonusTitle: 60 Minutes Interview-1996
? OSS_117--Cairo,_Nest_of_Spies.mkv
: type: movie
title: OSS 117--Cairo, Nest of Spies
? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
: type: movie
title: Rush Beyond The Lighted Stage
bonusNumber: 9
bonusTitle: Between Sun and Moon-2002 Hartford
? House.Hunters.International.S56E06.720p.hdtv.x264.mp4
: type: episode
series: House Hunters International
season: 56
episodeNumber: 6
screenSize: 720p
format: HDTV
videoCodec: h264
? White.House.Down.2013.1080p.BluRay.DTS-HD.MA.5.1.x264-PublicHD.mkv
: type: movie
title: White House Down
year: 2013
screenSize: 1080p
format: BluRay
audioCodec: DTS
audioProfile: HDMA
videoCodec: h264
releaseGroup: PublicHD
audioChannels: "5.1"
? Hostages.S01E01.Pilot.for.Air.720p.WEB-DL.DD5.1.H.264-NTb.nfo
: type: episodeinfo
series: Hostages
title: Pilot for Air
season: 1
episodeNumber: 1
screenSize: 720p
format: WEB-DL
audioChannels: "5.1"
videoCodec: h264
audioCodec: DolbyDigital
releaseGroup: NTb
? Despicable.Me.2.2013.1080p.BluRay.x264-VeDeTT.nfo
: type: movieinfo
title: Despicable Me 2
year: 2013
screenSize: 1080p
format: BluRay
videoCodec: h264
releaseGroup: VeDeTT
? Le Cinquieme Commando 1971 SUBFORCED FRENCH DVDRiP XViD AC3 Bandix.mkv
: type: movie
audioCodec: AC3
format: DVD
releaseGroup: Bandix
subtitleLanguage: French
title: Le Cinquieme Commando
videoCodec: XviD
year: 1971
? Le Seigneur des Anneaux - La Communauté de l'Anneau - Version Longue - BDRip.mkv
: type: movie
format: BluRay
title: Le Seigneur des Anneaux
? La petite bande (Michel Deville - 1983) VF PAL MP4 x264 AAC.mkv
: type: movie
audioCodec: AAC
language: French
title: La petite bande
videoCodec: h264
year: 1983
? Retour de Flammes (Gregor Schnitzler 2003) FULL DVD.iso
: type: movie
format: DVD
title: Retour de Flammes
type: movie
year: 2003
? A.Common.Title.Special.2014.avi
: type: movie
year: 2014
title: A Common Title Special
? A.Common.Title.2014.Special.avi
: type: episode
year: 2014
series: A Common Title
title: Special
special: Special
? A.Common.Title.2014.Special.Edition.avi
: type: movie
year: 2014
title: A Common Title
edition: Special Edition
? Downton.Abbey.2013.Christmas.Special.HDTV.x264-FoV.mp4
: type: episode
year: 2013
series: Downton Abbey
title: Christmas Special
videoCodec: h264
releaseGroup: FoV
format: HDTV
special: Special
? Doctor_Who_2013_Christmas_Special.The_Time_of_The_Doctor.HD
: options: -n
type: episode
series: Doctor Who
other: HD
special: Special
title: Christmas Special The Time of The Doctor
year: 2013
? Doctor Who 2005 50th Anniversary Special The Day of the Doctor 3.avi
: type: episode
series: Doctor Who
special: Special
title: 50th Anniversary Special The Day of the Doctor 3
year: 2005
? Robot Chicken S06-Born Again Virgin Christmas Special HDTV x264.avi
: type: episode
series: Robot Chicken
format: HDTV
season: 6
title: Born Again Virgin Christmas Special
videoCodec: h264
special: Special
? Wicked.Tuna.S03E00.Head.To.Tail.Special.HDTV.x264-YesTV
: options: -n
type: episode
series: Wicked Tuna
title: Head To Tail Special
releaseGroup: YesTV
season: 3
episodeNumber: 0
videoCodec: h264
format: HDTV
special: Special

View file

@ -1 +0,0 @@
Just a dummy srt file (used for unittests: do not remove!)

View file

@ -1,569 +0,0 @@
# Dubious tests
#
#? "finale "
#: releaseGroup: FiNaLe
# extension: ""
? Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.avi
: series: Californication
season: 2
episodeNumber: 5
title: Vaginatown
format: HDTV
videoCodec: XviD
releaseGroup: 0TV
? Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi
: series: Dexter
season: 5
episodeNumber: 2
title: Hello, Bandit
language: English
subtitleLanguage: French
format: HDTV
videoCodec: XviD
releaseGroup: AlFleNi-TeaM
website: tvu.org.ru
? Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi
: series: Treme
season: 1
episodeNumber: 3
title: Right Place, Wrong Time
format: HDTV
videoCodec: XviD
releaseGroup: NoTV
? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
: series: Duckman
season: 1
episodeNumber: 1
title: I, Duckman
date: 2002-11-07
? Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi
: series: Duckman
season: 1
episodeNumber: 13
title: Joking The Chicken
? Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.avi
: series: The Simpsons
season: 12
episodeNumber: 8
title: A Bas Le Sergent Skinner
language: French
? Series/Futurama/Season 3 (mkv)/[™] Futurama - S03E22 - Le chef de fer à 30% ( 30 Percent Iron Chef ).mkv
: series: Futurama
season: 3
episodeNumber: 22
title: Le chef de fer à 30%
? Series/The Office/Season 6/The Office - S06xE01.avi
: series: The Office
season: 6
episodeNumber: 1
? series/The Office/Season 4/The Office [401] Fun Run.avi
: series: The Office
season: 4
episodeNumber: 1
title: Fun Run
? Series/Mad Men Season 1 Complete/Mad.Men.S01E01.avi
: series: Mad Men
season: 1
episodeNumber: 1
other: complete
? series/Psych/Psych S02 Season 2 Complete English DVD/Psych.S02E02.65.Million.Years.Off.avi
: series: Psych
season: 2
episodeNumber: 2
title: 65 Million Years Off
language: english
format: DVD
other: complete
? series/Psych/Psych S02 Season 2 Complete English DVD/Psych.S02E03.Psy.Vs.Psy.Français.srt
: series: Psych
season: 2
episodeNumber: 3
title: Psy Vs Psy
format: DVD
language: English
subtitleLanguage: French
other: complete
? Series/Pure Laine/Pure.Laine.1x01.Toutes.Couleurs.Unies.FR.(Québec).DVB-Kceb.[tvu.org.ru].avi
: series: Pure Laine
season: 1
episodeNumber: 1
title: Toutes Couleurs Unies
format: DVB
releaseGroup: Kceb
language: french
website: tvu.org.ru
? Series/Pure Laine/2x05 - Pure Laine - Je Me Souviens.avi
: series: Pure Laine
season: 2
episodeNumber: 5
title: Je Me Souviens
? Series/Tout sur moi/Tout sur moi - S02E02 - Ménage à trois (14-01-2008) [Rip by Ampli].avi
: series: Tout sur moi
season: 2
episodeNumber: 2
title: Ménage à trois
date: 2008-01-14
? The.Mentalist.2x21.18-5-4.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi
: series: The Mentalist
season: 2
episodeNumber: 21
title: 18-5-4
language: english
subtitleLanguage: french
format: HDTV
videoCodec: Xvid
releaseGroup: AlFleNi-TeaM
website: tvu.org.ru
? series/__ Incomplete __/Dr Slump (Catalan)/Dr._Slump_-_003_DVB-Rip_Catalan_by_kelf.avi
: series: Dr Slump
episodeNumber: 3
format: DVB
language: catalan
? series/Ren and Stimpy - Black_hole_[DivX].avi
: series: Ren and Stimpy
title: Black hole
videoCodec: DivX
? Series/Walt Disney/Donald.Duck.-.Good.Scouts.[www.bigernie.jump.to].avi
: series: Donald Duck
title: Good Scouts
website: www.bigernie.jump.to
? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
: series: Neverwhere
episodeNumber: 5
title: Down Street
website: tvu.org.ru
? Series/South Park/Season 4/South.Park.4x07.Cherokee.Hair.Tampons.DVDRip.[tvu.org.ru].avi
: series: South Park
season: 4
episodeNumber: 7
title: Cherokee Hair Tampons
format: DVD
website: tvu.org.ru
? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
: series: Kaamelott
episodeNumber: 23
title: Le Forfait
? Series/Duckman/Duckman - 110 (10) - 20021218 - Cellar Beware.avi
: series: Duckman
season: 1
episodeNumber: 10
date: 2002-12-18
title: Cellar Beware
? Series/Ren & Stimpy/Ren And Stimpy - Onward & Upward-Adult Party Cartoon.avi
: series: Ren And Stimpy
title: Onward & Upward-Adult Party Cartoon
? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
: series: Breaking Bad
episodeFormat: Minisode
episodeNumber: 1
title: Good Cop Bad Cop
format: WEBRip
videoCodec: XviD
? Series/My Name Is Earl/My.Name.Is.Earl.S01Extras.-.Bad.Karma.DVDRip.XviD.avi
: series: My Name Is Earl
season: 1
title: Bad Karma
format: DVD
special: Extras
videoCodec: XviD
? /mnt/series/The Big Bang Theory/S01/The.Big.Bang.Theory.S01E01.mkv
: series: The Big Bang Theory
season: 1
episodeNumber: 1
? /media/Parks_and_Recreation-s03-e01.mkv
: series: Parks and Recreation
season: 3
episodeNumber: 1
? /media/Parks_and_Recreation-s03-e02-Flu_Season.mkv
: series: Parks and Recreation
season: 3
title: Flu Season
episodeNumber: 2
? /media/Parks_and_Recreation-s03-x01.mkv
: series: Parks and Recreation
season: 3
bonusNumber: 1
? /media/Parks_and_Recreation-s03-x02-Gag_Reel.mkv
: series: Parks and Recreation
season: 3
bonusNumber: 2
bonusTitle: Gag Reel
? /media/Band_of_Brothers-e01-Currahee.mkv
: series: Band of Brothers
episodeNumber: 1
title: Currahee
? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
: series: Band of Brothers
bonusNumber: 2
bonusTitle: We Stand Alone Together
? /TV Shows/Mad.M-5x9.mkv
: series: Mad M
season: 5
episodeNumber: 9
? /TV Shows/new.girl.117.hdtv-lol.mp4
: series: New Girl
season: 1
episodeNumber: 17
format: HDTV
releaseGroup: LOL
? Kaamelott - 5x44x45x46x47x48x49x50.avi
: series: Kaamelott
season: 5
episodeNumber: 44
episodeList: [44, 45, 46, 47, 48, 49, 50]
? Example S01E01-02.avi
: series: Example
season: 1
episodeNumber: 1
episodeList: [1, 2]
? Example S01E01E02.avi
: series: Example
season: 1
episodeNumber: 1
episodeList: [1, 2]
? Series/Baccano!/Baccano!_-_T1_-_Trailer_-_[Ayu](dae8173e).mkv
: series: Baccano!
other: Trailer
? Series/Doctor Who (2005)/Season 06/Doctor Who (2005) - S06E01 - The Impossible Astronaut (1).avi
: series: Doctor Who
year: 2005
season: 6
episodeNumber: 1
title: The Impossible Astronaut
? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
: series: The Office (US)
country: US
season: 1
episodeNumber: 3
title: Health Care
format: HDTV
videoCodec: XviD
releaseGroup: LOL
? /Volumes/data-1/Series/Futurama/Season 3/Futurama_-_S03_DVD_Bonus_-_Deleted_Scenes_Part_3.ogm
: series: Futurama
season: 3
other: Bonus
title: Deleted Scenes Part 3
format: DVD
? Ben.and.Kate.S01E02.720p.HDTV.X264-DIMENSION.mkv
: series: Ben and Kate
season: 1
episodeNumber: 2
screenSize: 720p
format: HDTV
videoCodec: h264
releaseGroup: DIMENSION
? /volume1/TV Series/Drawn Together/Season 1/Drawn Together 1x04 Requiem for a Reality Show.avi
: series: Drawn Together
season: 1
episodeNumber: 4
title: Requiem for a Reality Show
? Sons.of.Anarchy.S05E06.720p.WEB.DL.DD5.1.H.264-CtrlHD.mkv
: series: Sons of Anarchy
season: 5
episodeNumber: 6
screenSize: 720p
format: WEB-DL
audioChannels: "5.1"
audioCodec: DolbyDigital
videoCodec: h264
releaseGroup: CtrlHD
? /media/bdc64bfe-e36f-4af8-b550-e6fd2dfaa507/TV_Shows/Doctor Who (2005)/Saison 6/Doctor Who (2005) - S06E13 - The Wedding of River Song.mkv
: series: Doctor Who
season: 6
episodeNumber: 13
year: 2005
title: The Wedding of River Song
idNumber: bdc64bfe-e36f-4af8-b550-e6fd2dfaa507
? /mnt/videos/tvshows/Doctor Who/Season 06/E13 - The Wedding of River Song.mkv
: series: Doctor Who
season: 6
episodeNumber: 13
title: The Wedding of River Song
? The.Simpsons.S24E03.Adventures.in.Baby-Getting.720p.WEB-DL.DD5.1.H.264-CtrlHD.mkv
: series: The Simpsons
season: 24
episodeNumber: 3
title: Adventures in Baby-Getting
screenSize: 720p
format: WEB-DL
audioChannels: "5.1"
audioCodec: DolbyDigital
videoCodec: h264
releaseGroup: CtrlHD
? /home/disaster/Videos/TV/Merlin/merlin_2008.5x02.arthurs_bane_part_two.repack.720p_hdtv_x264-fov.mkv
: series: Merlin
season: 5
episodeNumber: 2
title: Arthurs bane part two
screenSize: 720p
format: HDTV
videoCodec: h264
releaseGroup: Fov
year: 2008
other: Proper
? "Da Vinci's Demons - 1x04 - The Magician.mkv"
: series: "Da Vinci's Demons"
season: 1
episodeNumber: 4
title: The Magician
? CSI.S013E18.Sheltered.720p.WEB-DL.DD5.1.H.264.mkv
: series: CSI
season: 13
episodeNumber: 18
title: Sheltered
screenSize: 720p
format: WEB-DL
audioChannels: "5.1"
audioCodec: DolbyDigital
videoCodec: h264
? Game of Thrones S03E06 1080i HDTV DD5.1 MPEG2-TrollHD.ts
: series: Game of Thrones
season: 3
episodeNumber: 6
screenSize: 1080i
format: HDTV
audioChannels: "5.1"
audioCodec: DolbyDigital
videoCodec: MPEG2
releaseGroup: TrollHD
? gossip.girl.s01e18.hdtv.xvid-2hd.eng.srt
: series: gossip girl
season: 1
episodeNumber: 18
format: HDTV
videoCodec: XviD
releaseGroup: 2HD
subtitleLanguage: english
? Wheels.S03E01E02.720p.HDTV.x264-IMMERSE.mkv
: series: Wheels
season: 3
episodeNumber: 1
episodeList: [1, 2]
screenSize: 720p
format: HDTV
videoCodec: h264
releaseGroup: IMMERSE
? Wheels.S03E01-02.720p.HDTV.x264-IMMERSE.mkv
: series: Wheels
season: 3
episodeNumber: 1
episodeList: [1, 2]
screenSize: 720p
format: HDTV
videoCodec: h264
releaseGroup: IMMERSE
? Wheels.S03E01-E02.720p.HDTV.x264-IMMERSE.mkv
: series: Wheels
season: 3
episodeNumber: 1
episodeList: [1, 2]
screenSize: 720p
format: HDTV
videoCodec: h264
releaseGroup: IMMERSE
? Wheels.S03E01-03.720p.HDTV.x264-IMMERSE.mkv
: series: Wheels
season: 3
episodeNumber: 1
episodeList: [1, 2, 3]
screenSize: 720p
format: HDTV
videoCodec: h264
releaseGroup: IMMERSE
? Marvels.Agents.of.S.H.I.E.L.D.S01E06.720p.HDTV.X264-DIMENSION.mkv
: series: Marvels Agents of S.H.I.E.L.D.
season: 1
episodeNumber: 6
screenSize: 720p
format: HDTV
videoCodec: h264
releaseGroup: DIMENSION
? Marvels.Agents.of.S.H.I.E.L.D..S01E06.720p.HDTV.X264-DIMENSION.mkv
: series: Marvels Agents of S.H.I.E.L.D.
season: 1
episodeNumber: 6
screenSize: 720p
format: HDTV
videoCodec: h264
releaseGroup: DIMENSION
? Series/Friday Night Lights/Season 1/Friday Night Lights S01E19 - Ch-Ch-Ch-Ch-Changes.avi
: series: Friday Night Lights
season: 1
episodeNumber: 19
title: Ch-Ch-Ch-Ch-Changes
? Dexter Saison VII FRENCH.BDRip.XviD-MiND.nfo
: series: Dexter
season: 7
videoCodec: XviD
language: French
format: BluRay
releaseGroup: MiND
? Dexter Saison sept FRENCH.BDRip.XviD-MiND.nfo
: series: Dexter
season: 7
videoCodec: XviD
language: French
format: BluRay
releaseGroup: MiND
? "Pokémon S16 - E29 - 1280*720 HDTV VF.mkv"
: series: Pokémon
format: HDTV
language: French
season: 16
episodeNumber: 29
screenSize: 720p
? One.Piece.E576.VOSTFR.720p.HDTV.x264-MARINE-FORD.mkv
: episodeNumber: 576
videoCodec: h264
format: HDTV
series: One Piece
releaseGroup: MARINE-FORD
subtitleLanguage: French
screenSize: 720p
? Dexter.S08E12.FINAL.MULTi.1080p.BluRay.x264-MiND.mkv
: videoCodec: h264
episodeNumber: 12
season: 8
format: BluRay
series: Dexter
other: final
language: Multiple languages
releaseGroup: MiND
screenSize: 1080p
? One Piece - E623 VOSTFR HD [www.manga-ddl-free.com].mkv
: website: www.manga-ddl-free.com
episodeNumber: 623
subtitleLanguage: French
series: One Piece
other: HD
? Falling Skies Saison 1.HDLight.720p.x264.VFF.mkv
: language: French
screenSize: 720p
season: 1
series: Falling Skies
videoCodec: h264
? Sleepy.Hollow.S01E09.720p.WEB-DL.DD5.1.H.264-BP.mkv
: episodeNumber: 9
videoCodec: h264
format: WEB-DL
series: Sleepy Hollow
audioChannels: "5.1"
screenSize: 720p
season: 1
videoProfile: BP
audioCodec: DolbyDigital
? Sleepy.Hollow.S01E09.720p.WEB-DL.DD5.1.H.264-BS.mkv
: episodeNumber: 9
videoCodec: h264
format: WEB-DL
series: Sleepy Hollow
audioChannels: "5.1"
screenSize: 720p
season: 1
releaseGroup: BS
audioCodec: DolbyDigital
? Battlestar.Galactica.S00.Pilot.FRENCH.DVDRip.XviD-NOTAG.avi
: series: Battlestar Galactica
season: 0
title: Pilot
special: Pilot
language: French
format: DVD
videoCodec: XviD
releaseGroup: NOTAG
? The Big Bang Theory S00E00 Unaired Pilot VOSTFR TVRip XviD-VioCs
: options: -n
series: The Big Bang Theory
season: 0
episodeNumber: 0
subtitleLanguage: French
format: TV
videoCodec: XviD
releaseGroup: VioCs
special: [Unaired, Pilot]
title: Unaired Pilot
? The Big Bang Theory S01E00 PROPER Unaired Pilot TVRip XviD-GIGGITY
: options: -n
series: The Big Bang Theory
season: 1
episodeNumber: 0
format: TV
videoCodec: XviD
releaseGroup: GIGGITY
other: proper
special: [Unaired, Pilot]
title: Unaired Pilot

View file

@ -1,168 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import base_text_type, u
from unittest import TestCase, TestLoader, TextTestRunner
import shlex
import yaml, logging, sys, os
from os.path import *
def currentPath():
'''Returns the path in which the calling file is located.'''
return dirname(join(os.getcwd(), sys._getframe(1).f_globals['__file__']))
def addImportPath(path):
'''Function that adds the specified path to the import path. The path can be
absolute or relative to the calling file.'''
importPath = abspath(join(currentPath(), path))
sys.path = [importPath] + sys.path
log = logging.getLogger(__name__)
from guessit.plugins import transformers
import guessit
from guessit.options import option_parser
from guessit import *
from guessit.matcher import *
from guessit.fileutils import *
def allTests(testClass):
return TestLoader().loadTestsFromTestCase(testClass)
class TestGuessit(TestCase):
def checkMinimumFieldsCorrect(self, filename, filetype=None, remove_type=True,
exclude_files=None):
groundTruth = yaml.load(load_file_in_same_dir(__file__, filename))
def guess_func(string, options=None):
return guess_file_info(string, options=options, type=filetype)
return self.checkFields(groundTruth, guess_func, remove_type, exclude_files)
def checkFields(self, groundTruth, guess_func, remove_type=True,
exclude_files=None):
total = 0
exclude_files = exclude_files or []
fails = {}
additionals = {}
for filename, required_fields in groundTruth.items():
filename = u(filename)
if filename in exclude_files:
continue
log.debug('\n' + '-' * 120)
log.info('Guessing information for file: %s' % filename)
options = required_fields.pop('options') if 'options' in required_fields else None
if options:
args = shlex.split(options)
options, _ = option_parser.parse_args(args)
options = vars(options)
found = guess_func(filename, options)
total = total + 1
# no need for these in the unittests
if remove_type:
try:
del found['type']
except:
pass
for prop in ('container', 'mimetype'):
if prop in found:
del found[prop]
# props which are list of just 1 elem should be opened for easier writing of the tests
for prop in ('language', 'subtitleLanguage', 'other', 'special'):
value = found.get(prop, None)
if isinstance(value, list) and len(value) == 1:
found[prop] = value[0]
# look for missing properties
for prop, value in required_fields.items():
if prop not in found:
log.debug("Prop '%s' not found in: %s" % (prop, filename))
if not filename in fails:
fails[filename] = []
fails[filename].append("'%s' not found in: %s" % (prop, filename))
continue
# if both properties are strings, do a case-insensitive comparison
if (isinstance(value, base_text_type) and
isinstance(found[prop], base_text_type)):
if value.lower() != found[prop].lower():
log.debug("Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
if not filename in fails:
fails[filename] = []
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
# if both are lists, we assume list of strings and do a case-insensitive
# comparison on their elements
elif isinstance(value, list) and isinstance(found[prop], list):
s1 = set(u(s).lower() for s in value)
s2 = set(u(s).lower() for s in found[prop])
if s1 != s2:
log.debug("Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
if not filename in fails:
fails[filename] = []
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
# otherwise, just compare their values directly
else:
if found[prop] != value:
log.debug("Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
if not filename in fails:
fails[filename] = []
fails[filename].append("'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
# look for additional properties
for prop, value in found.items():
if prop not in required_fields:
log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value)))
if not filename in additionals:
additionals[filename] = []
additionals[filename].append("'%s': '%s'" % (prop, u(value)))
correct = total - len(fails)
log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total))
for failed_entry, failed_properties in fails.items():
log.error('---- ' + failed_entry + ' ----')
for failed_property in failed_properties:
log.error("FAILED: " + failed_property)
for additional_entry, additional_properties in additionals.items():
log.warn('---- ' + additional_entry + ' ----')
for additional_property in additional_properties:
log.warn("ADDITIONAL: " + additional_property)
self.assertTrue(correct == total,
msg='Correct: %d < Total: %d' % (correct, total))

View file

@ -1,626 +0,0 @@
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
: title: Fear and Loathing in Las Vegas
year: 1998
screenSize: 720p
format: HD-DVD
audioCodec: DTS
videoCodec: h264
releaseGroup: ESiR
? Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi
: title: El Dia de la Bestia
year: 1995
format: DVD
language: spanish
videoCodec: DivX
? Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
: title: Dark City
year: 1998
format: BluRay
screenSize: 720p
audioCodec: DTS
videoCodec: h264
releaseGroup: CHD
? Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv
: title: Sin City
year: 2005
format: BluRay
screenSize: 720p
videoCodec: h264
audioCodec: AC3
releaseGroup: SEPTiC
? Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi
: title: Borat
year: 2006
other: PROPER
format: DVD
other: [ R5, Proper ]
videoCodec: XviD
releaseGroup: PUKKA
? "[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
: title: Le Prestige
format: DVD
videoCodec: h264
videoProfile: HP
audioCodec: AAC
audioProfile: HE
language: [ french, english ]
subtitleLanguage: [ french, english ]
? Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi
: title: Battle Royale
year: 2000
edition: special edition
cdNumber: 1
cdNumberTotal: 2
format: DVD
videoCodec: XviD
releaseGroup: ZeaL
? Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.avi
: title: Brazil
edition: Criterion Edition
year: 1985
cdNumber: 2
? Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv
: title: Persepolis
year: 2007
videoCodec: h264
audioCodec: AAC
language: [ French, English ]
subtitleLanguage: [ French, English ]
? Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv
: title: Toy Story
year: 1995
format: HDTV
screenSize: 720p
language: [ english, spanish ]
? Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi
: title: Office Space
year: 1999
format: DVD
language: [ english, spanish ]
videoCodec: XviD
audioCodec: AC3
? Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.avi
: title: Wild Zero
year: 2000
videoCodec: DivX
releaseGroup: EPiC
? movies/Baraka_Edition_Collector.avi
: title: Baraka
edition: collector edition
? Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director's.Cut).CD1.DVDRip.XviD.AC3-WAF.avi
: title: Blade Runner
year: 1982
edition: Director's Cut
cdNumber: 1
format: DVD
videoCodec: XviD
audioCodec: AC3
releaseGroup: WAF
? movies/American.The.Bill.Hicks.Story.2009.DVDRip.XviD-EPiSODE.[UsaBit.com]/UsaBit.com_esd-americanbh.avi
: title: American The Bill Hicks Story
year: 2009
format: DVD
videoCodec: XviD
releaseGroup: EPiSODE
website: UsaBit.com
? movies/Charlie.And.Boots.DVDRip.XviD-TheWretched/wthd-cab.avi
: title: Charlie And Boots
format: DVD
videoCodec: XviD
releaseGroup: TheWretched
? movies/Steig Larsson Millenium Trilogy (2009) BRrip 720 AAC x264/(1)The Girl With The Dragon Tattoo (2009) BRrip 720 AAC x264.mkv
: title: The Girl With The Dragon Tattoo
filmSeries: Steig Larsson Millenium Trilogy
filmNumber: 1
year: 2009
format: BluRay
audioCodec: AAC
videoCodec: h264
screenSize: 720p
? movies/Greenberg.REPACK.LiMiTED.DVDRip.XviD-ARROW/arw-repack-greenberg.dvdrip.xvid.avi
: title: Greenberg
format: DVD
videoCodec: XviD
releaseGroup: ARROW
other: ['Proper', 'Limited']
? Movies/Fr - Paris 2054, Renaissance (2005) - De Christian Volckman - (Film Divx Science Fiction Fantastique Thriller Policier N&B).avi
: title: Paris 2054, Renaissance
year: 2005
language: french
videoCodec: DivX
? Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
: title: Avida
year: 2006
language: french
format: DVD
videoCodec: XviD
releaseGroup: PROD
? Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
: title: Alice in Wonderland
format: DVD
videoCodec: XviD
releaseGroup: DiAMOND
? Movies/Ne.Le.Dis.A.Personne.Fr 2 cd/personnea_mp.avi
: title: Ne Le Dis A Personne
language: french
cdNumberTotal: 2
? Movies/Bunker Palace Hôtel (Enki Bilal) (1989)/Enki Bilal - Bunker Palace Hotel (Fr Vhs Rip).avi
: title: Bunker Palace Hôtel
year: 1989
language: french
format: VHS
? Movies/21 (2008)/21.(2008).DVDRip.x264.AC3-FtS.[sharethefiles.com].mkv
: title: "21"
year: 2008
format: DVD
videoCodec: h264
audioCodec: AC3
releaseGroup: FtS
website: sharethefiles.com
? Movies/9 (2009)/9.2009.Blu-ray.DTS.720p.x264.HDBRiSe.[sharethefiles.com].mkv
: title: "9"
year: 2009
format: BluRay
audioCodec: DTS
screenSize: 720p
videoCodec: h264
releaseGroup: HDBRiSe
website: sharethefiles.com
? Movies/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam.avi
: title: Mamma Mia
year: 2008
format: DVD
audioCodec: AC3
videoCodec: XviD
releaseGroup: CrazyTeam
? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
: title: M.A.S.H.
year: 1970
videoCodec: DivX
format: DVD
? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
: title: The Doors
year: 1991
date: 2008-03-09
format: BluRay
screenSize: 720p
audioCodec: AC3
videoCodec: h264
releaseGroup: HiS@SiLUHD
language: english
website: sharethefiles.com
? Movies/Ratatouille/video_ts-ratatouille.srt
: title: Ratatouille
format: DVD
? Movies/001 __ A classer/Fantomas se déchaine - Louis de Funès.avi
: title: Fantomas se déchaine
? Movies/Comme une Image (2004)/Comme.Une.Image.FRENCH.DVDRiP.XViD-NTK.par-www.divx-overnet.com.avi
: title: Comme une Image
year: 2004
language: french
format: DVD
videoCodec: XviD
releaseGroup: NTK
website: www.divx-overnet.com
? Movies/Fantastic Mr Fox/Fantastic.Mr.Fox.2009.DVDRip.{x264+LC-AAC.5.1}{Fr-Eng}{Sub.Fr-Eng}-™.[sharethefiles.com].mkv
: title: Fantastic Mr Fox
year: 2009
format: DVD
videoCodec: h264
audioCodec: AAC
audioProfile: LC
audioChannels: "5.1"
language: [ french, english ]
subtitleLanguage: [ french, english ]
website: sharethefiles.com
? Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi
: title: Somewhere
year: 2010
format: DVD
videoCodec: XviD
releaseGroup: iLG
? Movies/Moon_(2009).mkv
: title: Moon
year: 2009
? Movies/Moon_(2009)-x01.mkv
: title: Moon
year: 2009
bonusNumber: 1
? Movies/Moon_(2009)-x02-Making_Of.mkv
: title: Moon
year: 2009
bonusNumber: 2
bonusTitle: Making Of
? movies/James_Bond-f17-Goldeneye.mkv
: title: Goldeneye
filmSeries: James Bond
filmNumber: 17
? /movies/James_Bond-f21-Casino_Royale.mkv
: title: Casino Royale
filmSeries: James Bond
filmNumber: 21
? /movies/James_Bond-f21-Casino_Royale-x01-Becoming_Bond.mkv
: title: Casino Royale
filmSeries: James Bond
filmNumber: 21
bonusNumber: 1
bonusTitle: Becoming Bond
? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
: title: Casino Royale
filmSeries: James Bond
filmNumber: 21
bonusNumber: 2
bonusTitle: Stunts
? OSS_117--Cairo,_Nest_of_Spies.mkv
: title: OSS 117--Cairo, Nest of Spies
? The Godfather Part III.mkv
: title: The Godfather Part III
? Foobar Part VI.mkv
: title: Foobar Part VI
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
: title: The Insider
year: 1999
bonusNumber: 2
bonusTitle: 60 Minutes Interview-1996
? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
: title: Rush Beyond The Lighted Stage
bonusNumber: 9
bonusTitle: Between Sun and Moon-2002 Hartford
? /public/uTorrent/Downloads Finished/Movies/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX.mkv
: title: Indiana Jones and the Temple of Doom
year: 1984
format: HDTV
screenSize: 720p
videoCodec: h264
audioCodec: AC3
audioChannels: "5.1"
releaseGroup: REDµX
? The.Directors.Notebook.2006.Blu-Ray.x264.DXVA.720p.AC3-de[42].mkv
: title: The Directors Notebook
year: 2006
format: BluRay
videoCodec: h264
videoApi: DXVA
screenSize: 720p
audioCodec: AC3
releaseGroup: de[42]
? Movies/Cosmopolis.2012.LiMiTED.720p.BluRay.x264-AN0NYM0US[bb]/ano-cosmo.720p.mkv
: title: Cosmopolis
year: 2012
screenSize: 720p
videoCodec: h264
releaseGroup: AN0NYM0US[bb]
format: BluRay
other: LIMITED
? movies/La Science des Rêves (2006)/La.Science.Des.Reves.FRENCH.DVDRip.XviD-MP-AceBot.avi
: title: La Science des Rêves
year: 2006
format: DVD
videoCodec: XviD
videoProfile: MP
releaseGroup: AceBot
language: French
? The_Italian_Job.mkv
: title: The Italian Job
? The.Rum.Diary.2011.1080p.BluRay.DTS.x264.D-Z0N3.mkv
: title: The Rum Diary
year: 2011
screenSize: 1080p
format: BluRay
videoCodec: h264
audioCodec: DTS
releaseGroup: D-Z0N3
? Life.Of.Pi.2012.1080p.BluRay.DTS.x264.D-Z0N3.mkv
: title: Life Of Pi
year: 2012
screenSize: 1080p
format: BluRay
videoCodec: h264
audioCodec: DTS
releaseGroup: D-Z0N3
? The.Kings.Speech.2010.1080p.BluRay.DTS.x264.D Z0N3.mkv
: title: The Kings Speech
year: 2010
screenSize: 1080p
format: BluRay
audioCodec: DTS
videoCodec: h264
releaseGroup: D-Z0N3
? Street.Kings.2008.BluRay.1080p.DTS.x264.dxva EuReKA.mkv
: title: Street Kings
year: 2008
format: BluRay
screenSize: 1080p
audioCodec: DTS
videoCodec: h264
videoApi: DXVA
releaseGroup: EuReKa
? 2001.A.Space.Odyssey.1968.HDDVD.1080p.DTS.x264.dxva EuReKA.mkv
: title: 2001 A Space Odyssey
year: 1968
format: HD-DVD
screenSize: 1080p
audioCodec: DTS
videoCodec: h264
videoApi: DXVA
releaseGroup: EuReKa
? 2012.2009.720p.BluRay.x264.DTS WiKi.mkv
: title: "2012"
year: 2009
screenSize: 720p
format: BluRay
videoCodec: h264
audioCodec: DTS
releaseGroup: WiKi
? /share/Download/movie/Dead Man Down (2013) BRRiP XViD DD5_1 Custom NLSubs =-_lt Q_o_Q gt-=_/XD607ebb-BRc59935-5155473f-1c5f49/XD607ebb-BRc59935-5155473f-1c5f49.avi
: title: Dead Man Down
year: 2013
format: BluRay
videoCodec: XviD
audioChannels: "5.1"
audioCodec: DolbyDigital
idNumber: XD607ebb-BRc59935-5155473f-1c5f49
? Pacific.Rim.3D.2013.COMPLETE.BLURAY-PCH.avi
: title: Pacific Rim
year: 2013
format: BluRay
other:
- complete
- 3D
releaseGroup: PCH
? Immersion.French.2011.STV.READNFO.QC.FRENCH.ENGLISH.NTSC.DVDR.nfo
: title: Immersion French
year: 2011
language:
- French
- English
? Immersion.French.2011.STV.READNFO.QC.FRENCH.NTSC.DVDR.nfo
: title: Immersion French
year: 2011
language: French
? Immersion.French.2011.STV.READNFO.QC.NTSC.DVDR.nfo
: title: Immersion French
year: 2011
? French.Immersion.2011.STV.READNFO.QC.ENGLISH.NTSC.DVDR.nfo
: title: French Immersion
year: 2011
language: ENGLISH
? Howl's_Moving_Castle_(2004)_[720p,HDTV,x264,DTS]-FlexGet.avi
: videoCodec: h264
format: HDTV
title: Howl's Moving Castle
screenSize: 720p
year: 2004
audioCodec: DTS
releaseGroup: FlexGet
? Pirates de langkasuka.2008.FRENCH.1920X1080.h264.AVC.AsiaRa.mkv
: screenSize: 1080p
year: 2008
language: French
videoCodec: h264
title: Pirates de langkasuka
releaseGroup: AsiaRa
? Masala (2013) Telugu Movie HD DVDScr XviD - Exclusive.avi
: year: 2013
videoCodec: XviD
title: Masala
format: HD-DVD
other: screener
language: Telugu
releaseGroup: Exclusive
? Django Unchained 2012 DVDSCR X264 AAC-P2P.nfo
: year: 2012
other: screener
videoCodec: h264
title: Django Unchained
audioCodec: AAC
format: DVD
releaseGroup: P2P
? Ejecutiva.En.Apuros(2009).BLURAY.SCR.Xvid.Spanish.LanzamientosD.nfo
: year: 2009
other: screener
format: BluRay
videoCodec: XviD
language: Spanish
title: Ejecutiva En Apuros
? Die.Schluempfe.2.German.DL.1080p.BluRay.x264-EXQUiSiTE.mkv
: title: Die Schluempfe 2
format: BluRay
language:
- Multiple languages
- German
videoCodec: h264
releaseGroup: EXQUiSiTE
screenSize: 1080p
? Rocky 1976 French SubForced BRRip x264 AC3-FUNKY.mkv
: title: Rocky
year: 1976
subtitleLanguage: French
format: BluRay
videoCodec: h264
audioCodec: AC3
releaseGroup: FUNKY
? REDLINE (BD 1080p H264 10bit FLAC) [3xR].mkv
: title: REDLINE
format: BluRay
videoCodec: h264
videoProfile: 10bit
audioCodec: Flac
screenSize: 1080p
? The.Lizzie.McGuire.Movie.(2003).HR.DVDRiP.avi
: title: The Lizzie McGuire Movie
year: 2003
screenSize: 480p
format: DVD
? Hua.Mulan.BRRIP.MP4.x264.720p-HR.avi
: title: Hua Mulan
videoCodec: h264
format: BluRay
screenSize: 720p
? Dr.Seuss.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
: videoCodec: XviD
title: Dr Seuss The Lorax
format: DVD
other: LiNE
year: 2012
audioCodec: AC3
audioProfile: HQ
releaseGroup: Hive-CM8
? "Star Wars: Episode IV - A New Hope (2004) Special Edition.MKV"
: title: Star Wars Episode IV
year: 2004
edition: Special Edition
? Dr.LiNE.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
: videoCodec: XviD
title: Dr LiNE The Lorax
format: DVD
other: LiNE
year: 2012
audioCodec: AC3
audioProfile: HQ
releaseGroup: Hive-CM8
? Perfect Child-2007-TRUEFRENCH-TVRip.Xvid-h@mster.avi
: releaseGroup: h@mster
title: Perfect Child
videoCodec: XviD
language: French
format: TV
year: 2007
? entre.ciel.et.terre.(1994).dvdrip.h264.aac-psypeon.avi
: audioCodec: AAC
format: DVD
releaseGroup: psypeon
title: entre ciel et terre
videoCodec: h264
year: 1994
? Yves.Saint.Laurent.2013.FRENCH.DVDSCR.MD.XviD-ViVARiUM.avi
: format: DVD
language: French
other: Screener
releaseGroup: ViVARiUM
title: Yves Saint Laurent
videoCodec: XviD
year: 2013
? Echec et Mort - Hard to Kill - Steven Seagal Multi 1080p BluRay x264 CCATS.avi
: format: BluRay
language: Multiple languages
releaseGroup: CCATS
screenSize: 1080p
title: Echec et Mort
videoCodec: h264
? Paparazzi - Timsit/Lindon (MKV 1080p tvripHD)
: options: -n
title: Paparazzi
screenSize: 1080p
format: HDTV
? some.movie.720p.bluray.x264-mind
: options: -n
title: some movie
screenSize: 720p
videoCodec: h264
releaseGroup: mind
format: BluRay
? Dr LiNE The Lorax 720p h264 BluRay
: options: -n
title: Dr LiNE The Lorax
screenSize: 720p
videoCodec: h264
format: BluRay
? BeatdownFrenchDVDRip.mkv
: title: Beatdown
language: French
format: DVD
? YvesSaintLaurent2013FrenchDVDScrXvid.avi
: format: DVD
language: French
other: Screener
title: Yves saint laurent
videoCodec: XviD
year: 2013

View file

@ -1,473 +0,0 @@
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
aar aa Afar, afar 0 0
abk ab Abkhazian 0 0
ace Achinese 0 0
ach Acoli 0 0
ada Adangme 0 0
ady adyghé 0 0
afa Afro-Asiatic (Other) 0 0
afh Afrihili 0 0
afr af Afrikaans 0 0
ain Ainu 0 0
aka ak Akan 0 0
akk Akkadian 0 0
alb sq Albanian 1 1
ale Aleut 0 0
alg Algonquian languages 0 0
alt Southern Altai 0 0
amh am Amharic 0 0
ang English, Old (ca.450-1100) 0 0
apa Apache languages 0 0
ara ar Arabic 1 1
arc Aramaic 0 0
arg an Aragonese 0 0
arm hy Armenian 1 0
arn Araucanian 0 0
arp Arapaho 0 0
art Artificial (Other) 0 0
arw Arawak 0 0
asm as Assamese 0 0
ast Asturian, Bable 0 0
ath Athapascan languages 0 0
aus Australian languages 0 0
ava av Avaric 0 0
ave ae Avestan 0 0
awa Awadhi 0 0
aym ay Aymara 0 0
aze az Azerbaijani 0 0
bad Banda 0 0
bai Bamileke languages 0 0
bak ba Bashkir 0 0
bal Baluchi 0 0
bam bm Bambara 0 0
ban Balinese 0 0
baq eu Basque 1 1
bas Basa 0 0
bat Baltic (Other) 0 0
bej Beja 0 0
bel be Belarusian 0 0
bem Bemba 0 0
ben bn Bengali 1 0
ber Berber (Other) 0 0
bho Bhojpuri 0 0
bih bh Bihari 0 0
bik Bikol 0 0
bin Bini 0 0
bis bi Bislama 0 0
bla Siksika 0 0
bnt Bantu (Other) 0 0
bos bs Bosnian 1 0
bra Braj 0 0
bre br Breton 1 0
btk Batak (Indonesia) 0 0
bua Buriat 0 0
bug Buginese 0 0
bul bg Bulgarian 1 1
bur my Burmese 0 0
byn Blin 0 0
cad Caddo 0 0
cai Central American Indian (Other) 0 0
car Carib 0 0
cat ca Catalan 1 1
cau Caucasian (Other) 0 0
ceb Cebuano 0 0
cel Celtic (Other) 0 0
cha ch Chamorro 0 0
chb Chibcha 0 0
che ce Chechen 0 0
chg Chagatai 0 0
chi zh Chinese 1 1
chk Chuukese 0 0
chm Mari 0 0
chn Chinook jargon 0 0
cho Choctaw 0 0
chp Chipewyan 0 0
chr Cherokee 0 0
chu cu Church Slavic 0 0
chv cv Chuvash 0 0
chy Cheyenne 0 0
cmc Chamic languages 0 0
cop Coptic 0 0
cor kw Cornish 0 0
cos co Corsican 0 0
cpe Creoles and pidgins, English based (Other) 0 0
cpf Creoles and pidgins, French-based (Other) 0 0
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
cre cr Cree 0 0
crh Crimean Tatar 0 0
crp Creoles and pidgins (Other) 0 0
csb Kashubian 0 0
cus Cushitic (Other)' couchitiques, autres langues 0 0
cze cs Czech 1 1
dak Dakota 0 0
dan da Danish 1 1
dar Dargwa 0 0
day Dayak 0 0
del Delaware 0 0
den Slave (Athapascan) 0 0
dgr Dogrib 0 0
din Dinka 0 0
div dv Divehi 0 0
doi Dogri 0 0
dra Dravidian (Other) 0 0
dua Duala 0 0
dum Dutch, Middle (ca.1050-1350) 0 0
dut nl Dutch 1 1
dyu Dyula 0 0
dzo dz Dzongkha 0 0
efi Efik 0 0
egy Egyptian (Ancient) 0 0
eka Ekajuk 0 0
elx Elamite 0 0
eng en English 1 1
enm English, Middle (1100-1500) 0 0
epo eo Esperanto 1 0
est et Estonian 1 1
ewe ee Ewe 0 0
ewo Ewondo 0 0
fan Fang 0 0
fao fo Faroese 0 0
fat Fanti 0 0
fij fj Fijian 0 0
fil Filipino 0 0
fin fi Finnish 1 1
fiu Finno-Ugrian (Other) 0 0
fon Fon 0 0
fre fr French 1 1
frm French, Middle (ca.1400-1600) 0 0
fro French, Old (842-ca.1400) 0 0
fry fy Frisian 0 0
ful ff Fulah 0 0
fur Friulian 0 0
gaa Ga 0 0
gay Gayo 0 0
gba Gbaya 0 0
gem Germanic (Other) 0 0
geo ka Georgian 1 1
ger de German 1 1
gez Geez 0 0
gil Gilbertese 0 0
gla gd Gaelic 0 0
gle ga Irish 0 0
glg gl Galician 1 1
glv gv Manx 0 0
gmh German, Middle High (ca.1050-1500) 0 0
goh German, Old High (ca.750-1050) 0 0
gon Gondi 0 0
gor Gorontalo 0 0
got Gothic 0 0
grb Grebo 0 0
grc Greek, Ancient (to 1453) 0 0
ell el Greek 1 1
grn gn Guarani 0 0
guj gu Gujarati 0 0
gwi Gwich´in 0 0
hai Haida 0 0
hat ht Haitian 0 0
hau ha Hausa 0 0
haw Hawaiian 0 0
heb he Hebrew 1 1
her hz Herero 0 0
hil Hiligaynon 0 0
him Himachali 0 0
hin hi Hindi 1 1
hit Hittite 0 0
hmn Hmong 0 0
hmo ho Hiri Motu 0 0
hrv hr Croatian 1 1
hun hu Hungarian 1 1
hup Hupa 0 0
iba Iban 0 0
ibo ig Igbo 0 0
ice is Icelandic 1 1
ido io Ido 0 0
iii ii Sichuan Yi 0 0
ijo Ijo 0 0
iku iu Inuktitut 0 0
ile ie Interlingue 0 0
ilo Iloko 0 0
ina ia Interlingua (International Auxiliary Language Asso 0 0
inc Indic (Other) 0 0
ind id Indonesian 1 1
ine Indo-European (Other) 0 0
inh Ingush 0 0
ipk ik Inupiaq 0 0
ira Iranian (Other) 0 0
iro Iroquoian languages 0 0
ita it Italian 1 1
jav jv Javanese 0 0
jpn ja Japanese 1 1
jpr Judeo-Persian 0 0
jrb Judeo-Arabic 0 0
kaa Kara-Kalpak 0 0
kab Kabyle 0 0
kac Kachin 0 0
kal kl Kalaallisut 0 0
kam Kamba 0 0
kan kn Kannada 0 0
kar Karen 0 0
kas ks Kashmiri 0 0
kau kr Kanuri 0 0
kaw Kawi 0 0
kaz kk Kazakh 1 0
kbd Kabardian 0 0
kha Khasi 0 0
khi Khoisan (Other) 0 0
khm km Khmer 1 1
kho Khotanese 0 0
kik ki Kikuyu 0 0
kin rw Kinyarwanda 0 0
kir ky Kirghiz 0 0
kmb Kimbundu 0 0
kok Konkani 0 0
kom kv Komi 0 0
kon kg Kongo 0 0
kor ko Korean 1 1
kos Kosraean 0 0
kpe Kpelle 0 0
krc Karachay-Balkar 0 0
kro Kru 0 0
kru Kurukh 0 0
kua kj Kuanyama 0 0
kum Kumyk 0 0
kur ku Kurdish 0 0
kut Kutenai 0 0
lad Ladino 0 0
lah Lahnda 0 0
lam Lamba 0 0
lao lo Lao 0 0
lat la Latin 0 0
lav lv Latvian 1 0
lez Lezghian 0 0
lim li Limburgan 0 0
lin ln Lingala 0 0
lit lt Lithuanian 1 0
lol Mongo 0 0
loz Lozi 0 0
ltz lb Luxembourgish 1 0
lua Luba-Lulua 0 0
lub lu Luba-Katanga 0 0
lug lg Ganda 0 0
lui Luiseno 0 0
lun Lunda 0 0
luo Luo (Kenya and Tanzania) 0 0
lus lushai 0 0
mac mk Macedonian 1 1
mad Madurese 0 0
mag Magahi 0 0
mah mh Marshallese 0 0
mai Maithili 0 0
mak Makasar 0 0
mal ml Malayalam 0 0
man Mandingo 0 0
mao mi Maori 0 0
map Austronesian (Other) 0 0
mar mr Marathi 0 0
mas Masai 0 0
may ms Malay 1 1
mdf Moksha 0 0
mdr Mandar 0 0
men Mende 0 0
mga Irish, Middle (900-1200) 0 0
mic Mi'kmaq 0 0
min Minangkabau 0 0
mis Miscellaneous languages 0 0
mkh Mon-Khmer (Other) 0 0
mlg mg Malagasy 0 0
mlt mt Maltese 0 0
mnc Manchu 0 0
mni Manipuri 0 0
mno Manobo languages 0 0
moh Mohawk 0 0
mol mo Moldavian 0 0
mon mn Mongolian 1 0
mos Mossi 0 0
mwl Mirandese 0 0
mul Multiple languages 0 0
mun Munda languages 0 0
mus Creek 0 0
mwr Marwari 0 0
myn Mayan languages 0 0
myv Erzya 0 0
nah Nahuatl 0 0
nai North American Indian 0 0
nap Neapolitan 0 0
nau na Nauru 0 0
nav nv Navajo 0 0
nbl nr Ndebele, South 0 0
nde nd Ndebele, North 0 0
ndo ng Ndonga 0 0
nds Low German 0 0
nep ne Nepali 0 0
new Nepal Bhasa 0 0
nia Nias 0 0
nic Niger-Kordofanian (Other) 0 0
niu Niuean 0 0
nno nn Norwegian Nynorsk 0 0
nob nb Norwegian Bokmal 0 0
nog Nogai 0 0
non Norse, Old 0 0
nor no Norwegian 1 1
nso Northern Sotho 0 0
nub Nubian languages 0 0
nwc Classical Newari 0 0
nya ny Chichewa 0 0
nym Nyamwezi 0 0
nyn Nyankole 0 0
nyo Nyoro 0 0
nzi Nzima 0 0
oci oc Occitan 1 1
oji oj Ojibwa 0 0
ori or Oriya 0 0
orm om Oromo 0 0
osa Osage 0 0
oss os Ossetian 0 0
ota Turkish, Ottoman (1500-1928) 0 0
oto Otomian languages 0 0
paa Papuan (Other) 0 0
pag Pangasinan 0 0
pal Pahlavi 0 0
pam Pampanga 0 0
pan pa Panjabi 0 0
pap Papiamento 0 0
pau Palauan 0 0
peo Persian, Old (ca.600-400 B.C.) 0 0
per fa Persian 1 1
phi Philippine (Other) 0 0
phn Phoenician 0 0
pli pi Pali 0 0
pol pl Polish 1 1
pon Pohnpeian 0 0
por pt Portuguese 1 1
pra Prakrit languages 0 0
pro Provençal, Old (to 1500) 0 0
pus ps Pushto 0 0
que qu Quechua 0 0
raj Rajasthani 0 0
rap Rapanui 0 0
rar Rarotongan 0 0
roa Romance (Other) 0 0
roh rm Raeto-Romance 0 0
rom Romany 0 0
run rn Rundi 0 0
rup Aromanian 0 0
rus ru Russian 1 1
sad Sandawe 0 0
sag sg Sango 0 0
sah Yakut 0 0
sai South American Indian (Other) 0 0
sal Salishan languages 0 0
sam Samaritan Aramaic 0 0
san sa Sanskrit 0 0
sas Sasak 0 0
sat Santali 0 0
scc sr Serbian 1 1
scn Sicilian 0 0
sco Scots 0 0
sel Selkup 0 0
sem Semitic (Other) 0 0
sga Irish, Old (to 900) 0 0
sgn Sign Languages 0 0
shn Shan 0 0
sid Sidamo 0 0
sin si Sinhalese 1 1
sio Siouan languages 0 0
sit Sino-Tibetan (Other) 0 0
sla Slavic (Other) 0 0
slo sk Slovak 1 1
slv sl Slovenian 1 1
sma Southern Sami 0 0
sme se Northern Sami 0 0
smi Sami languages (Other) 0 0
smj Lule Sami 0 0
smn Inari Sami 0 0
smo sm Samoan 0 0
sms Skolt Sami 0 0
sna sn Shona 0 0
snd sd Sindhi 0 0
snk Soninke 0 0
sog Sogdian 0 0
som so Somali 0 0
son Songhai 0 0
sot st Sotho, Southern 0 0
spa es Spanish 1 1
srd sc Sardinian 0 0
srr Serer 0 0
ssa Nilo-Saharan (Other) 0 0
ssw ss Swati 0 0
suk Sukuma 0 0
sun su Sundanese 0 0
sus Susu 0 0
sux Sumerian 0 0
swa sw Swahili 1 0
swe sv Swedish 1 1
syr Syriac 1 0
tah ty Tahitian 0 0
tai Tai (Other) 0 0
tam ta Tamil 0 0
tat tt Tatar 0 0
tel te Telugu 0 0
tem Timne 0 0
ter Tereno 0 0
tet Tetum 0 0
tgk tg Tajik 0 0
tgl tl Tagalog 1 1
tha th Thai 1 1
tib bo Tibetan 0 0
tig Tigre 0 0
tir ti Tigrinya 0 0
tiv Tiv 0 0
tkl Tokelau 0 0
tlh Klingon 0 0
tli Tlingit 0 0
tmh Tamashek 0 0
tog Tonga (Nyasa) 0 0
ton to Tonga (Tonga Islands) 0 0
tpi Tok Pisin 0 0
tsi Tsimshian 0 0
tsn tn Tswana 0 0
tso ts Tsonga 0 0
tuk tk Turkmen 0 0
tum Tumbuka 0 0
tup Tupi languages 0 0
tur tr Turkish 1 1
tut Altaic (Other) 0 0
tvl Tuvalu 0 0
twi tw Twi 0 0
tyv Tuvinian 0 0
udm Udmurt 0 0
uga Ugaritic 0 0
uig ug Uighur 0 0
ukr uk Ukrainian 1 1
umb Umbundu 0 0
und Undetermined 0 0
urd ur Urdu 1 0
uzb uz Uzbek 0 0
vai Vai 0 0
ven ve Venda 0 0
vie vi Vietnamese 1 1
vol vo Volapük 0 0
vot Votic 0 0
wak Wakashan languages 0 0
wal Walamo 0 0
war Waray 0 0
was Washo 0 0
wel cy Welsh 0 0
wen Sorbian languages 0 0
wln wa Walloon 0 0
wol wo Wolof 0 0
xal Kalmyk 0 0
xho xh Xhosa 0 0
yao Yao 0 0
yap Yapese 0 0
yid yi Yiddish 0 0
yor yo Yoruba 0 0
ypk Yupik languages 0 0
zap Zapotec 0 0
zen Zenaga 0 0
zha za Zhuang 0 0
znd Zande 0 0
zul zu Zulu 0 0
zun Zuni 0 0
rum ro Romanian 1 1
pob pb Brazilian 1 1

View file

@ -1,54 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestApi(TestGuessit):
def test_api(self):
movie_path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
movie_info = guessit.guess_movie_info(movie_path)
video_info = guessit.guess_video_info(movie_path)
episode_info = guessit.guess_episode_info(movie_path)
file_info = guessit.guess_file_info(movie_path)
self.assertEqual(guessit.guess_file_info(movie_path, type='movie'), movie_info)
self.assertEqual(guessit.guess_file_info(movie_path, type='video'), video_info)
self.assertEqual(guessit.guess_file_info(movie_path, type='episode'), episode_info)
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'movie'}), movie_info)
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'video'}), video_info)
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}), episode_info)
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}, type='movie'), episode_info) # kwargs priority other options
movie_path_name_only = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD'
file_info_name_only = guessit.guess_file_info(movie_path_name_only, options={"name_only": True})
self.assertFalse('container' in file_info_name_only)
self.assertTrue('container' in file_info)
suite = allTests(TestApi)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,45 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestAutoDetect(TestGuessit):
def testEmpty(self):
result = guessit.guess_file_info('')
self.assertEqual(result, {})
result = guessit.guess_file_info('___-__')
self.assertEqual(result, {})
result = guessit.guess_file_info('__-.avc')
self.assertEqual(result, {'type': 'unknown', 'extension': 'avc'})
def testAutoDetect(self):
self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
remove_type=False)
suite = allTests(TestAutoDetect)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,46 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
IGNORE_EPISODES = []
IGNORE_MOVIES = []
class TestAutoDetectAll(TestGuessit):
def testAutoMatcher(self):
self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
remove_type=False)
def testAutoMatcherMovies(self):
self.checkMinimumFieldsCorrect(filename='movies.yaml',
exclude_files=IGNORE_MOVIES)
def testAutoMatcherEpisodes(self):
self.checkMinimumFieldsCorrect(filename='episodes.yaml',
exclude_files=IGNORE_EPISODES)
suite = allTests(TestAutoDetectAll)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,45 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
import guessit
import guessit.hash_ed2k
import unittest
import doctest
def load_tests(loader, tests, ignore):
tests.addTests(doctest.DocTestSuite(guessit))
tests.addTests(doctest.DocTestSuite(guessit.date))
tests.addTests(doctest.DocTestSuite(guessit.fileutils))
tests.addTests(doctest.DocTestSuite(guessit.guess))
tests.addTests(doctest.DocTestSuite(guessit.hash_ed2k))
tests.addTests(doctest.DocTestSuite(guessit.language))
tests.addTests(doctest.DocTestSuite(guessit.matchtree))
tests.addTests(doctest.DocTestSuite(guessit.textutils))
return tests
suite = unittest.TestSuite()
load_tests(None, suite, None)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,35 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestEpisode(TestGuessit):
def testEpisodes(self):
self.checkMinimumFieldsCorrect(filetype='episode',
filename='episodes.yaml')
suite = allTests(TestEpisode)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,46 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestHashes(TestGuessit):
def test_hashes(self):
hashes = (
('hash_mpc', '1MB', u'8542ad406c15c8bd'), # TODO: Check if this value is valid
('hash_ed2k', '1MB', u'ed2k://|file|1MB|1048576|AA3CC5552A9931A76B61A41D306735F7|/'), # TODO: Check if this value is valid
('hash_md5', '1MB', u'5d8dcbca8d8ac21766f28797d6c3954c'),
('hash_sha1', '1MB', u'51d2b8f3248d7ee495b7750c8da5aa3b3819de9d'),
('hash_md5', 'dummy.srt', u'64de6b5893cac24456c46a935ef9c359'),
('hash_sha1', 'dummy.srt', u'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
)
for hash_type, filename, expected_value in hashes:
guess = guess_file_info(file_in_same_dir(__file__, filename), hash_type)
computed_value = guess.get(hash_type)
self.assertEqual(expected_value, guess.get(hash_type), "Invalid %s for %s: %s != %s" % (hash_type, filename, computed_value, expected_value))
suite = allTests(TestHashes)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,138 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
import io
class TestLanguage(TestGuessit):
def check_languages(self, languages):
for lang1, lang2 in languages.items():
self.assertEqual(Language(lang1),
Language(lang2))
def test_addic7ed(self):
languages = {'English': 'en',
'English (US)': 'en',
'English (UK)': 'en',
'Italian': 'it',
'Portuguese': 'pt',
'Portuguese (Brazilian)': 'pt',
'Romanian': 'ro',
'Español (Latinoamérica)': 'es',
'Español (España)': 'es',
'Spanish (Latin America)': 'es',
'Español': 'es',
'Spanish': 'es',
'Spanish (Spain)': 'es',
'French': 'fr',
'Greek': 'el',
'Arabic': 'ar',
'German': 'de',
'Croatian': 'hr',
'Indonesian': 'id',
'Hebrew': 'he',
'Russian': 'ru',
'Turkish': 'tr',
'Swedish': 'se',
'Czech': 'cs',
'Dutch': 'nl',
'Hungarian': 'hu',
'Norwegian': 'no',
'Polish': 'pl',
'Persian': 'fa'}
self.check_languages(languages)
def test_subswiki(self):
languages = {'English (US)': 'en', 'English (UK)': 'en', 'English': 'en',
'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
'Español (Latinoamérica)': 'es', 'Español (España)': 'es',
'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
self.check_languages(languages)
def test_tvsubtitles(self):
languages = {'English': 'en', 'Español': 'es', 'French': 'fr', 'German': 'de',
'Brazilian': 'br', 'Russian': 'ru', 'Ukrainian': 'ua', 'Italian': 'it',
'Greek': 'gr', 'Arabic': 'ar', 'Hungarian': 'hu', 'Polish': 'pl',
'Turkish': 'tr', 'Dutch': 'nl', 'Portuguese': 'pt', 'Swedish': 'sv',
'Danish': 'da', 'Finnish': 'fi', 'Korean': 'ko', 'Chinese': 'cn',
'Japanese': 'jp', 'Bulgarian': 'bg', 'Czech': 'cz', 'Romanian': 'ro'}
self.check_languages(languages)
def test_opensubtitles(self):
opensubtitles_langfile = file_in_same_dir(__file__, 'opensubtitles_languages_2012_05_09.txt')
for l in [u(l).strip() for l in io.open(opensubtitles_langfile, encoding='utf-8')][1:]:
idlang, alpha2, _, upload_enabled, web_enabled = l.strip().split('\t')
# do not test languages that are too esoteric / not widely available
if int(upload_enabled) and int(web_enabled):
# check that we recognize the opensubtitles language code correctly
# and that we are able to output this code from a language
self.assertEqual(idlang, Language(idlang).opensubtitles)
if alpha2:
# check we recognize the opensubtitles 2-letter code correctly
self.check_languages({idlang: alpha2})
def test_tmdb(self):
# examples from http://api.themoviedb.org/2.1/language-tags
for lang in ['en-US', 'en-CA', 'es-MX', 'fr-PF']:
self.assertEqual(lang, Language(lang).tmdb)
def test_subtitulos(self):
languages = {'English (US)': 'en', 'English (UK)': 'en', 'English': 'en',
'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
'Español (Latinoamérica)': 'es', 'Español (España)': 'es',
'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
self.check_languages(languages)
def test_thesubdb(self):
languages = {'af': 'af', 'cs': 'cs', 'da': 'da', 'de': 'de', 'en': 'en', 'es': 'es', 'fi': 'fi',
'fr': 'fr', 'hu': 'hu', 'id': 'id', 'it': 'it', 'la': 'la', 'nl': 'nl', 'no': 'no',
'oc': 'oc', 'pl': 'pl', 'pt': 'pt', 'ro': 'ro', 'ru': 'ru', 'sl': 'sl', 'sr': 'sr',
'sv': 'sv', 'tr': 'tr'}
self.check_languages(languages)
def test_language_object(self):
self.assertEqual(len(list(set([Language('qwerty'), Language('asdf')]))), 1)
d = {Language('qwerty'): 7}
d[Language('asdf')] = 23
self.assertEqual(d[Language('qwerty')], 23)
def test_exceptions(self):
self.assertEqual(Language('br'), Language('pt(br)'))
# languages should be equal regardless of country
self.assertEqual(Language('br'), Language('pt'))
self.assertEqual(Language('unknown'), Language('und'))
suite = allTests(TestLanguage)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,70 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
from guessit.fileutils import split_path, file_in_same_dir
from guessit.textutils import strip_brackets, str_replace, str_fill
from guessit import PY2
from guessit import __main__
if PY2:
from StringIO import StringIO
else:
from io import StringIO
class TestMain(TestGuessit):
def setUp(self):
self._stdout = sys.stdout
string_out = StringIO()
sys.stdout = string_out
def tearDown(self):
sys.stdout = self._stdout
def test_list_properties(self):
__main__.main(["-p"], False)
__main__.main(["-l"], False)
def test_list_transformers(self):
__main__.main(["--transformers"], False)
__main__.main(["-l", "--transformers"], False)
def test_demo(self):
__main__.main(["-d"], False)
__main__.main(["-l"], False)
def test_filename(self):
__main__.main(["A.Movie.2014.avi"], False)
__main__.main(["A.Movie.2014.avi", "A.2nd.Movie.2014.avi"], False)
__main__.main(["-y", "A.Movie.2014.avi"], False)
__main__.main(["-a", "A.Movie.2014.avi"], False)
__main__.main(["-v", "A.Movie.2014.avi"], False)
__main__.main(["-t", "movie", "A.Movie.2014.avi"], False)
__main__.main(["-t", "episode", "A.Serie.S02E06.avi"], False)
__main__.main(["-i", "hash_mpc", file_in_same_dir(__file__, "1MB")], False)
__main__.main(["-i", "hash_md5", file_in_same_dir(__file__, "1MB")], False)
suite = allTests(TestMain)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,93 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
from guessit.transfo.guess_release_group import GuessReleaseGroup
from guessit.transfo.guess_properties import GuessProperties
from guessit.matchtree import BaseMatchTree
keywords = yaml.load("""
? Xvid PROPER
: videoCodec: Xvid
other: PROPER
? PROPER-Xvid
: videoCodec: Xvid
other: PROPER
""")
def guess_info(string, options=None):
mtree = MatchTree(string)
GuessReleaseGroup().process(mtree, options)
GuessProperties().process(mtree, options)
return mtree.matched()
class TestMatchTree(TestGuessit):
def test_base_tree(self):
t = BaseMatchTree('One Two Three(Three) Four')
t.partition((3, 7, 20))
leaves = t.leaves()
self.assertEqual(leaves[0].span, (0, 3))
self.assertEqual('One', leaves[0].value)
self.assertEqual(' Two', leaves[1].value)
self.assertEqual(' Three(Three)', leaves[2].value)
self.assertEqual(' Four', leaves[3].value)
leaves[2].partition((1, 6, 7, 12))
three_leaves = leaves[2].leaves()
self.assertEqual('Three', three_leaves[1].value)
self.assertEqual('Three', three_leaves[3].value)
leaves = t.leaves()
self.assertEqual(len(leaves), 8)
self.assertEqual(leaves[5], three_leaves[3])
self.assertEqual(t.previous_leaf(leaves[5]), leaves[4])
self.assertEqual(t.next_leaf(leaves[5]), leaves[6])
self.assertEqual(t.next_leaves(leaves[5]), [leaves[6], leaves[7]])
self.assertEqual(t.previous_leaves(leaves[5]), [leaves[4], leaves[3], leaves[2], leaves[1], leaves[0]])
self.assertEqual(t.next_leaf(leaves[7]), None)
self.assertEqual(t.previous_leaf(leaves[0]), None)
self.assertEqual(t.next_leaves(leaves[7]), [])
self.assertEqual(t.previous_leaves(leaves[0]), [])
def test_match(self):
self.checkFields(keywords, guess_info)
suite = allTests(TestMatchTree)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,35 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestMovie(TestGuessit):
def testMovies(self):
self.checkMinimumFieldsCorrect(filetype='movie',
filename='movies.yaml')
suite = allTests(TestMovie)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,126 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.quality import best_quality, best_quality_properties
from guessit.containers import QualitiesContainer
from guessit.test.guessittest import *
class TestQuality(TestGuessit):
def test_container(self):
container = QualitiesContainer()
container.register_quality('color', 'red', 10)
container.register_quality('color', 'orange', 20)
container.register_quality('color', 'green', 30)
container.register_quality('context', 'sun', 100)
container.register_quality('context', 'sea', 200)
container.register_quality('context', 'sex', 300)
g1 = Guess()
g1['color'] = 'red'
g2 = Guess()
g2['color'] = 'green'
g3 = Guess()
g3['color'] = 'orange'
q3 = container.rate_quality(g3)
self.assertEqual(q3, 20, "ORANGE should be rated 20. Don't ask why!")
q1 = container.rate_quality(g1)
q2 = container.rate_quality(g2)
self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
g1['context'] = 'sex'
g2['context'] = 'sun'
q1 = container.rate_quality(g1)
q2 = container.rate_quality(g2)
self.assertTrue(q1 > q2, "SEX should be greater than SUN. Don't ask why!")
self.assertEqual(container.best_quality(g1, g2), g1, "RED&SEX should be better than GREEN&SUN. Don't ask why!")
self.assertEqual(container.best_quality_properties(['color'], g1, g2), g2, "GREEN should be better than RED. Don't ask why!")
self.assertEqual(container.best_quality_properties(['context'], g1, g2), g1, "SEX should be better than SUN. Don't ask why!")
q1 = container.rate_quality(g1, 'color')
q2 = container.rate_quality(g2, 'color')
self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
container.unregister_quality('context', 'sex')
container.unregister_quality('context', 'sun')
q1 = container.rate_quality(g1)
q2 = container.rate_quality(g2)
self.assertTrue(q2 > q1, "GREEN&SUN should be greater than RED&SEX. Don't ask why!")
g3['context'] = 'sea'
container.unregister_quality('context', 'sea')
q3 = container.rate_quality(g3, 'context')
self.assertEqual(q3, 0, "Context should be unregistered.")
container.unregister_quality('color')
q3 = container.rate_quality(g3, 'color')
self.assertEqual(q3, 0, "Color should be unregistered.")
container.clear_qualities()
q1 = container.rate_quality(g1)
q2 = container.rate_quality(g2)
self.assertTrue(q1 == q2 == 0, "Empty quality container should rate each guess to 0")
def test_quality_transformers(self):
guess_720p = guessit.guess_file_info("2012.2009.720p.BluRay.x264.DTS WiKi.mkv")
guess_1080p = guessit.guess_file_info("2012.2009.1080p.BluRay.x264.MP3 WiKi.mkv")
self.assertTrue('audioCodec' in guess_720p, "audioCodec should be present")
self.assertTrue('audioCodec' in guess_1080p, "audioCodec should be present")
self.assertTrue('screenSize' in guess_720p, "screenSize should be present")
self.assertTrue('screenSize' in guess_1080p, "screenSize should be present")
best_quality_guess = best_quality(guess_720p, guess_1080p)
self.assertTrue(guess_1080p == best_quality_guess, "1080p+MP3 is not the best global quality")
best_quality_guess = best_quality_properties(['screenSize'], guess_720p, guess_1080p)
self.assertTrue(guess_1080p == best_quality_guess, "1080p is not the best screenSize")
best_quality_guess = best_quality_properties(['audioCodec'], guess_720p, guess_1080p)
self.assertTrue(guess_720p == best_quality_guess, "DTS is not the best audioCodec")
suite = allTests(TestQuality)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,155 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
from guessit.fileutils import split_path
from guessit.textutils import strip_brackets, str_replace, str_fill, from_camel, is_camel,\
levenshtein, reorder_title
from guessit import PY2
from guessit.date import search_date, search_year
from datetime import datetime, date, timedelta
class TestUtils(TestGuessit):
def test_splitpath(self):
alltests = {False: {'/usr/bin/smewt': ['/', 'usr', 'bin', 'smewt'],
'relative_path/to/my_folder/': ['relative_path', 'to', 'my_folder'],
'//some/path': ['//', 'some', 'path'],
'//some//path': ['//', 'some', 'path'],
'///some////path': ['///', 'some', 'path']
},
True: {'C:\\Program Files\\Smewt\\smewt.exe': ['C:\\', 'Program Files', 'Smewt', 'smewt.exe'],
'Documents and Settings\\User\\config': ['Documents and Settings', 'User', 'config'],
'C:\\Documents and Settings\\User\\config': ['C:\\', 'Documents and Settings', 'User', 'config'],
# http://bugs.python.org/issue19945
'\\\\netdrive\\share': ['\\\\', 'netdrive', 'share'] if PY2 else ['\\\\netdrive\\share'],
'\\\\netdrive\\share\\folder': ['\\\\', 'netdrive', 'share', 'folder'] if PY2 else ['\\\\netdrive\\share\\', 'folder'],
}
}
tests = alltests[sys.platform == 'win32']
for path, split in tests.items():
self.assertEqual(split, split_path(path))
def test_strip_brackets(self):
allTests = (('', ''),
('[test]', 'test'),
('{test2}', 'test2'),
('(test3)', 'test3'),
('(test4]', '(test4]'),
)
for i, e in allTests:
self.assertEqual(e, strip_brackets(i))
def test_levenshtein(self):
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmno"), 0)
self.assertEqual(levenshtein("abcdef ghijk lmnop", "abcdef ghijk lmno"), 1)
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmn"), 1)
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnp"), 1)
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnq"), 1)
self.assertEqual(levenshtein("cbcdef ghijk lmno", "abcdef ghijk lmnq"), 2)
self.assertEqual(levenshtein("cbcdef ghihk lmno", "abcdef ghijk lmnq"), 3)
def test_reorder_title(self):
self.assertEqual(reorder_title("Simpsons, The"), "The Simpsons")
self.assertEqual(reorder_title("Simpsons,The"), "The Simpsons")
self.assertEqual(reorder_title("Simpsons,Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
self.assertEqual(reorder_title("Simpsons, Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
def test_camel(self):
self.assertEqual("", from_camel(""))
self.assertEqual("Hello world", str_replace("Hello World", 6, 'w'))
self.assertEqual("Hello *****", str_fill("Hello World", (6, 11), '*'))
self.assertTrue("This is camel", from_camel("ThisIsCamel"))
self.assertEqual('camel case', from_camel('camelCase'))
self.assertEqual('A case', from_camel('ACase'))
self.assertEqual('MiXedCaSe is not camel case', from_camel('MiXedCaSe is not camelCase'))
self.assertEqual("This is camel cased title", from_camel("ThisIsCamelCasedTitle"))
self.assertEqual("This is camel CASED title", from_camel("ThisIsCamelCASEDTitle"))
self.assertEqual("These are camel CASED title", from_camel("TheseAreCamelCASEDTitle"))
self.assertEqual("Give a camel case string", from_camel("GiveACamelCaseString"))
self.assertEqual("Death TO camel case", from_camel("DeathTOCamelCase"))
self.assertEqual("But i like java too:)", from_camel("ButILikeJavaToo:)"))
self.assertEqual("Beatdown french DVD rip.mkv", from_camel("BeatdownFrenchDVDRip.mkv"))
self.assertEqual("DO NOTHING ON UPPER CASE", from_camel("DO NOTHING ON UPPER CASE"))
self.assertFalse(is_camel("this_is_not_camel"))
self.assertTrue(is_camel("ThisIsCamel"))
self.assertEqual("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv", from_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
self.assertFalse(is_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
self.assertEqual("A2LiNE", from_camel("A2LiNE"))
def test_date(self):
self.assertEqual(search_year(' in the year 2000... '), (2000, (13, 17)))
self.assertEqual(search_year(' they arrived in 1492. '), (None, None))
today = date.today()
today_year_2 = int(str(today.year)[2:])
future = today + timedelta(days=1000)
future_year_2 = int(str(future.year)[2:])
past = today - timedelta(days=10000)
past_year_2 = int(str(past.year)[2:])
self.assertEqual(search_date(' Something before 2002-04-22 '), (date(2002, 4, 22), (18, 28)))
self.assertEqual(search_date(' 2002-04-22 Something after '), (date(2002, 4, 22), (1, 11)))
self.assertEqual(search_date(' This happened on 2002-04-22. '), (date(2002, 4, 22), (18, 28)))
self.assertEqual(search_date(' This happened on 22-04-2002. '), (date(2002, 4, 22), (18, 28)))
self.assertEqual(search_date(' This happened on 13-04-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
self.assertEqual(search_date(' This happened on 22-04-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
self.assertEqual(search_date(' This happened on 20-04-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
self.assertEqual(search_date(' This happened on 04-13-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
self.assertEqual(search_date(' This happened on 04-22-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
self.assertEqual(search_date(' This happened on 04-20-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
self.assertEqual(search_date(' This happened on 35-12-%s. ' % (today_year_2,)), (None, None))
self.assertEqual(search_date(' This happened on 37-18-%s. ' % (future_year_2,)), (None, None))
self.assertEqual(search_date(' This happened on 44-42-%s. ' % (past_year_2)), (None, None))
self.assertEqual(search_date(' This happened on %s. ' % (today, )), (today, (18, 28)))
self.assertEqual(search_date(' This happened on %s. ' % (future, )), (future, (18, 28)))
self.assertEqual(search_date(' This happened on %s. ' % (past, )), (past, (18, 28)))
self.assertEqual(search_date(' released date: 04-03-1901? '), (None, None))
self.assertEqual(search_date(' There\'s no date in here. '), (None, None))
suite = allTests(TestUtils)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,25 +1,24 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # Smewt - A smart collection manager
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2008-2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # Smewt is free software; you can redistribute it and/or modify
# the terms of the Lesser GNU General Public License as published by # it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or # the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version. # (at your option) any later version.
# #
# GuessIt is distributed in the hope that it will be useful, # Smewt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details. # GNU General Public License for more details.
# #
# You should have received a copy of the Lesser GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import s from guessit import s
from guessit.patterns import sep from guessit.patterns import sep
import functools import functools
@ -28,7 +27,6 @@ import re
# string-related functions # string-related functions
def normalize_unicode(s): def normalize_unicode(s):
return unicodedata.normalize('NFC', s) return unicodedata.normalize('NFC', s)
@ -45,36 +43,19 @@ def strip_brackets(s):
return s return s
_dotted_rexp = re.compile(r'(?:\W|^)(([A-Za-z]\.){2,}[A-Za-z]\.?)')
def clean_string(st): def clean_string(st):
for c in sep: for c in sep:
# do not remove certain chars # do not remove certain chars
if c in ['-', ',']: if c in ['-', ',']:
continue continue
if c == '.':
# we should not remove the dots for acronyms and such
dotted = _dotted_rexp.search(st)
if dotted:
s = dotted.group(1)
exclude_begin, exclude_end = dotted.span(1)
st = (st[:exclude_begin].replace(c, ' ') +
st[exclude_begin:exclude_end] +
st[exclude_end:].replace(c, ' '))
continue
st = st.replace(c, ' ') st = st.replace(c, ' ')
parts = st.split() parts = st.split()
result = ' '.join(p for p in parts if p != '') result = ' '.join(p for p in parts if p != '')
# now also remove dashes on the outer part of the string # now also remove dashes on the outer part of the string
while result and result[0] in '-': while result and result[0] in sep:
result = result[1:] result = result[1:]
while result and result[-1] in '-': while result and result[-1] in sep:
result = result[:-1] result = result[:-1]
return result return result
@ -82,23 +63,21 @@ def clean_string(st):
_words_rexp = re.compile('\w+', re.UNICODE) _words_rexp = re.compile('\w+', re.UNICODE)
def find_words(s): def find_words(s):
return _words_rexp.findall(s.replace('_', ' ')) return _words_rexp.findall(s.replace('_', ' '))
def reorder_title(title, articles=('the',), separators=(',', ', ')): def reorder_title(title):
ltitle = title.lower() ltitle = title.lower()
for article in articles: if ltitle[-4:] == ',the':
for separator in separators: return title[-3:] + ' ' + title[:-4]
suffix = separator + article if ltitle[-5:] == ', the':
if ltitle[-len(suffix):] == suffix: return title[-3:] + ' ' + title[:-5]
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
return title return title
def str_replace(string, pos, c): def str_replace(string, pos, c):
return string[:pos] + c + string[pos + 1:] return string[:pos] + c + string[pos+1:]
def str_fill(string, region, c): def str_fill(string, region, c):
@ -106,6 +85,7 @@ def str_fill(string, region, c):
return string[:start] + c * (end - start) + string[end:] return string[:start] + c * (end - start) + string[end:]
def levenshtein(a, b): def levenshtein(a, b):
if not a: if not a:
return len(b) return len(b)
@ -115,25 +95,25 @@ def levenshtein(a, b):
m = len(a) m = len(a)
n = len(b) n = len(b)
d = [] d = []
for i in range(m + 1): for i in range(m+1):
d.append([0] * (n + 1)) d.append([0] * (n+1))
for i in range(m + 1): for i in range(m+1):
d[i][0] = i d[i][0] = i
for j in range(n + 1): for j in range(n+1):
d[0][j] = j d[0][j] = j
for i in range(1, m + 1): for i in range(1, m+1):
for j in range(1, n + 1): for j in range(1, n+1):
if a[i - 1] == b[j - 1]: if a[i-1] == b[j-1]:
cost = 0 cost = 0
else: else:
cost = 1 cost = 1
d[i][j] = min(d[i - 1][j] + 1, # deletion d[i][j] = min(d[i-1][j] + 1, # deletion
d[i][j - 1] + 1, # insertion d[i][j-1] + 1, # insertion
d[i - 1][j - 1] + cost # substitution d[i-1][j-1] + cost # substitution
) )
return d[m][n] return d[m][n]
@ -160,7 +140,7 @@ def find_first_level_groups_span(string, enclosing):
[(2, 5), (7, 10)] [(2, 5), (7, 10)]
""" """
opening, closing = enclosing opening, closing = enclosing
depth = [] # depth is a stack of indices where we opened a group depth = [] # depth is a stack of indices where we opened a group
result = [] result = []
for i, c, in enumerate(string): for i, c, in enumerate(string):
if c == opening: if c == opening:
@ -171,7 +151,7 @@ def find_first_level_groups_span(string, enclosing):
end = i end = i
if not depth: if not depth:
# we emptied our stack, so we have a 1st level group # we emptied our stack, so we have a 1st level group
result.append((start, end + 1)) result.append((start, end+1))
except IndexError: except IndexError:
# we closed a group which was not opened before # we closed a group which was not opened before
pass pass
@ -192,7 +172,7 @@ def split_on_groups(string, groups):
""" """
if not groups: if not groups:
return [string] return [ string ]
boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, []))) boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, [])))
if boundaries[0] != 0: if boundaries[0] != 0:
@ -200,10 +180,10 @@ def split_on_groups(string, groups):
if boundaries[-1] != len(string): if boundaries[-1] != len(string):
boundaries.append(len(string)) boundaries.append(len(string))
groups = [string[start:end] for start, end in zip(boundaries[:-1], groups = [ string[start:end] for start, end in zip(boundaries[:-1],
boundaries[1:])] boundaries[1:]) ]
return [g for g in groups if g] # return only non-empty groups return [ g for g in groups if g ] # return only non-empty groups
def find_first_level_groups(string, enclosing, blank_sep=None): def find_first_level_groups(string, enclosing, blank_sep=None):
@ -239,114 +219,6 @@ def find_first_level_groups(string, enclosing, blank_sep=None):
if blank_sep: if blank_sep:
for start, end in groups: for start, end in groups:
string = str_replace(string, start, blank_sep) string = str_replace(string, start, blank_sep)
string = str_replace(string, end - 1, blank_sep) string = str_replace(string, end-1, blank_sep)
return split_on_groups(string, groups) return split_on_groups(string, groups)
_camel_word2_set = set(('is', 'to',))
_camel_word3_set = set(('the',))
def _camel_split_and_lower(string, i):
"""Retrieves a tuple (need_split, need_lower)
need_split is True if this char is a first letter in a camelCasedString.
need_lower is True if this char should be lowercased.
"""
def islower(c):
return c.isalpha() and not c.isupper()
previous_char2 = string[i - 2] if i > 1 else None
previous_char = string[i - 1] if i > 0 else None
char = string[i]
next_char = string[i + 1] if i + 1 < len(string) else None
next_char2 = string[i + 2] if i + 2 < len(string) else None
char_upper = char.isupper()
char_lower = islower(char)
# previous_char2_lower = islower(previous_char2) if previous_char2 else False
previous_char2_upper = previous_char2.isupper() if previous_char2 else False
previous_char_lower = islower(previous_char) if previous_char else False
previous_char_upper = previous_char.isupper() if previous_char else False
next_char_upper = next_char.isupper() if next_char else False
next_char_lower = islower(next_char) if next_char else False
next_char2_upper = next_char2.isupper() if next_char2 else False
# next_char2_lower = islower(next_char2) if next_char2 else False
mixedcase_word = (previous_char_upper and char_lower and next_char_upper) or \
(previous_char_lower and char_upper and next_char_lower and next_char2_upper) or \
(previous_char2_upper and previous_char_lower and char_upper)
if mixedcase_word:
word2 = (char + next_char).lower() if next_char else None
word3 = (char + next_char + next_char2).lower() if next_char and next_char2 else None
word2b = (previous_char2 + previous_char).lower() if previous_char2 and previous_char else None
if word2 in _camel_word2_set or word2b in _camel_word2_set or word3 in _camel_word3_set:
mixedcase_word = False
uppercase_word = previous_char_upper and char_upper and next_char_upper or (char_upper and next_char_upper and next_char2_upper)
need_split = char_upper and previous_char_lower and not mixedcase_word
if not need_split:
previous_char_upper = string[i - 1].isupper() if i > 0 else False
next_char_lower = (string[i + 1].isalpha() and not string[i + 1].isupper()) if i + 1 < len(string) else False
need_split = char_upper and previous_char_upper and next_char_lower
uppercase_word = previous_char_upper and not next_char_lower
need_lower = not uppercase_word and not mixedcase_word and need_split
return (need_split, need_lower)
def is_camel(string):
"""
>>> is_camel('dogEATDog')
True
>>> is_camel('DeathToCamelCase')
True
>>> is_camel('death_to_camel_case')
False
>>> is_camel('TheBest')
True
>>> is_camel('The Best')
False
"""
for i in range(0, len(string)):
need_split, _ = _camel_split_and_lower(string, i)
if need_split:
return True
return False
def from_camel(string):
"""
>>> from_camel('dogEATDog') == 'dog EAT dog'
True
>>> from_camel('DeathToCamelCase') == 'Death to camel case'
True
>>> from_camel('TheBest') == 'The best'
True
>>> from_camel('MiXedCaSe is not camelCase') == 'MiXedCaSe is not camel case'
True
"""
if not string:
return string
pieces = []
for i in range(0, len(string)):
char = string[i]
need_split, need_lower = _camel_split_and_lower(string, i)
if need_split:
pieces.append(' ')
if need_lower:
pieces.append(char.lower())
else:
pieces.append(char)
return ''.join(pieces)

View file

@ -1,341 +0,0 @@
# Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
AC
AD
AE
AERO
AF
AG
AI
AL
AM
AN
AO
AQ
AR
ARPA
AS
ASIA
AT
AU
AW
AX
AZ
BA
BB
BD
BE
BF
BG
BH
BI
BIKE
BIZ
BJ
BM
BN
BO
BR
BS
BT
BV
BW
BY
BZ
CA
CAMERA
CAT
CC
CD
CF
CG
CH
CI
CK
CL
CLOTHING
CM
CN
CO
COM
CONSTRUCTION
CONTRACTORS
COOP
CR
CU
CV
CW
CX
CY
CZ
DE
DIAMONDS
DIRECTORY
DJ
DK
DM
DO
DZ
EC
EDU
EE
EG
ENTERPRISES
EQUIPMENT
ER
ES
ESTATE
ET
EU
FI
FJ
FK
FM
FO
FR
GA
GALLERY
GB
GD
GE
GF
GG
GH
GI
GL
GM
GN
GOV
GP
GQ
GR
GRAPHICS
GS
GT
GU
GURU
GW
GY
HK
HM
HN
HOLDINGS
HR
HT
HU
ID
IE
IL
IM
IN
INFO
INT
IO
IQ
IR
IS
IT
JE
JM
JO
JOBS
JP
KE
KG
KH
KI
KITCHEN
KM
KN
KP
KR
KW
KY
KZ
LA
LAND
LB
LC
LI
LIGHTING
LK
LR
LS
LT
LU
LV
LY
MA
MC
MD
ME
MG
MH
MIL
MK
ML
MM
MN
MO
MOBI
MP
MQ
MR
MS
MT
MU
MUSEUM
MV
MW
MX
MY
MZ
NA
NAME
NC
NE
NET
NF
NG
NI
NL
NO
NP
NR
NU
NZ
OM
ORG
PA
PE
PF
PG
PH
PHOTOGRAPHY
PK
PL
PLUMBING
PM
PN
POST
PR
PRO
PS
PT
PW
PY
QA
RE
RO
RS
RU
RW
SA
SB
SC
SD
SE
SEXY
SG
SH
SI
SINGLES
SJ
SK
SL
SM
SN
SO
SR
ST
SU
SV
SX
SY
SZ
TATTOO
TC
TD
TECHNOLOGY
TEL
TF
TG
TH
TIPS
TJ
TK
TL
TM
TN
TO
TODAY
TP
TR
TRAVEL
TT
TV
TW
TZ
UA
UG
UK
US
UY
UZ
VA
VC
VE
VENTURES
VG
VI
VN
VOYAGE
VU
WF
WS
XN--3E0B707E
XN--45BRJ9C
XN--80AO21A
XN--80ASEHDB
XN--80ASWG
XN--90A3AC
XN--CLCHC0EA0B2G2A9GCD
XN--FIQS8S
XN--FIQZ9S
XN--FPCRJ9C3D
XN--FZC2C9E2C
XN--GECRJ9C
XN--H2BRJ9C
XN--J1AMH
XN--J6W193G
XN--KPRW13D
XN--KPRY57D
XN--L1ACC
XN--LGBBAT1AD8J
XN--MGB9AWBF
XN--MGBA3A4F16A
XN--MGBAAM7A8H
XN--MGBAYH7GPA
XN--MGBBH1A71E
XN--MGBC0A9AZCG
XN--MGBERP4A5D4AR
XN--MGBX4CD0AB
XN--NGBC5AZD
XN--O3CW4H
XN--OGBPF8FL
XN--P1AI
XN--PGBS0DH
XN--Q9JYB4C
XN--S9BRJ9C
XN--UNUP4Y
XN--WGBH1C
XN--WGBL6A
XN--XKC2AL3HYE2A
XN--XKC2DL3A5EE0H
XN--YFRO4I67O
XN--YGBI2AMMX
XXX
YE
YT
ZA
ZM
ZW

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,13 +18,92 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import base_text_type, Guess
from guessit.patterns import canonical_form
from guessit.textutils import clean_string
import logging
log = logging.getLogger(__name__)
class TransformerException(Exception): def found_property(node, name, confidence):
def __init__(self, transformer, message): node.guess = Guess({name: node.clean_value}, confidence=confidence, raw=node.value)
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
# Call the base class constructor with the parameters it needs
Exception.__init__(self, message)
self.transformer = transformer def format_guess(guess):
"""Format all the found values to their natural type.
For instance, a year would be stored as an int value, etc...
Note that this modifies the dictionary given as input.
"""
for prop, value in guess.items():
if prop in ('season', 'episodeNumber', 'year', 'cdNumber',
'cdNumberTotal', 'bonusNumber', 'filmNumber'):
guess[prop] = int(guess[prop])
elif isinstance(value, base_text_type):
if prop in ('edition',):
value = clean_string(value)
guess[prop] = canonical_form(value).replace('\\', '')
return guess
def find_and_split_node(node, strategy, logger):
string = ' %s ' % node.value # add sentinels
for matcher, confidence, args, kwargs in strategy:
all_args = [string]
if getattr(matcher, 'use_node', False):
all_args.append(node)
if args:
all_args.append(args)
if kwargs:
result, span = matcher(*all_args, **kwargs)
else:
result, span = matcher(*all_args)
if result:
# readjust span to compensate for sentinels
span = (span[0] - 1, span[1] - 1)
if isinstance(result, Guess):
if confidence is None:
confidence = result.confidence(list(result.keys())[0])
else:
if confidence is None:
confidence = 1.0
guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1]))
msg = 'Found with confidence %.2f: %s' % (confidence, guess)
(logger or log).debug(msg)
node.partition(span)
absolute_span = (span[0] + node.offset, span[1] + node.offset)
for child in node.children:
if child.span == absolute_span:
child.guess = guess
else:
find_and_split_node(child, strategy, logger)
return
class SingleNodeGuesser(object):
def __init__(self, guess_func, confidence, logger, *args, **kwargs):
self.guess_func = guess_func
self.confidence = confidence
self.logger = logger
self.args = args
self.kwargs = kwargs
def process(self, mtree):
# strategy is a list of pairs (guesser, confidence)
# - if the guesser returns a guessit.Guess and confidence is specified,
# it will override it, otherwise it will leave the guess confidence
# - if the guesser returns a simple dict as a guess and confidence is
# specified, it will use it, or 1.0 otherwise
strategy = [ (self.guess_func, self.confidence, self.args, self.kwargs) ]
for node in mtree.unidentified_leaves():
find_and_split_node(node, strategy, self.logger)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,50 +18,44 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.transfo import found_property
import logging
from guessit.plugins.transformers import Transformer log = logging.getLogger(__name__)
from guessit.matcher import found_property
class GuessBonusFeatures(Transformer): def process(mtree):
def __init__(self): def previous_group(g):
Transformer.__init__(self, -150) for leaf in mtree.unidentified_leaves()[::-1]:
if leaf.node_idx < g.node_idx:
return leaf
def supported_properties(self): def next_group(g):
return ['bonusNumber', 'bonusTitle', 'filmNumber', 'filmSeries', 'title', 'series'] for leaf in mtree.unidentified_leaves():
if leaf.node_idx > g.node_idx:
return leaf
def process(self, mtree, options=None): def same_group(g1, g2):
def previous_group(g): return g1.node_idx[:2] == g2.node_idx[:2]
for leaf in mtree.unidentified_leaves()[::-1]:
if leaf.node_idx < g.node_idx:
return leaf
def next_group(g): bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ]
for leaf in mtree.unidentified_leaves(): if bonus:
if leaf.node_idx > g.node_idx: bonusTitle = next_group(bonus[0])
return leaf if same_group(bonusTitle, bonus[0]):
found_property(bonusTitle, 'bonusTitle', 0.8)
def same_group(g1, g2): filmNumber = [ node for node in mtree.leaves()
return g1.node_idx[:2] == g2.node_idx[:2] if 'filmNumber' in node.guess ]
if filmNumber:
filmSeries = previous_group(filmNumber[0])
found_property(filmSeries, 'filmSeries', 0.9)
bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess] title = next_group(filmNumber[0])
if bonus: found_property(title, 'title', 0.9)
bonusTitle = next_group(bonus[0])
if bonusTitle and same_group(bonusTitle, bonus[0]):
found_property(bonusTitle, 'bonusTitle', confidence=0.8)
filmNumber = [node for node in mtree.leaves() season = [ node for node in mtree.leaves() if 'season' in node.guess ]
if 'filmNumber' in node.guess] if season and 'bonusNumber' in mtree.info:
if filmNumber: series = previous_group(season[0])
filmSeries = previous_group(filmNumber[0]) if same_group(series, season[0]):
found_property(filmSeries, 'filmSeries', confidence=0.9) found_property(series, 'series', 0.9)
title = next_group(filmNumber[0])
found_property(title, 'title', confidence=0.9)
season = [node for node in mtree.leaves() if 'season' in node.guess]
if season and 'bonusNumber' in mtree.info:
series = previous_group(season[0])
if same_group(series, season[0]):
found_property(series, 'series', confidence=0.9)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,52 +18,31 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.country import Country from guessit.country import Country
from guessit import Guess from guessit import Guess
import logging
log = logging.getLogger(__name__)
class GuessCountry(Transformer): # list of common words which could be interpreted as countries, but which
def __init__(self): # are far too common to be able to say they represent a country
Transformer.__init__(self, -170) country_common_words = frozenset([ 'bt', 'bb' ])
# list of common words which could be interpreted as countries, but which
# are far too common to be able to say they represent a country
self.country_common_words = frozenset(['bt', 'bb'])
def supported_properties(self): def process(mtree):
return ['country'] for node in mtree.unidentified_leaves():
if len(node.node_idx) == 2:
c = node.value[1:-1].lower()
if c in country_common_words:
continue
def should_process(self, mtree, options=None): # only keep explicit groups (enclosed in parentheses/brackets)
options = options or {} if node.value[0] + node.value[-1] not in ['()', '[]', '{}']:
return 'nocountry' not in options.keys() continue
def process(self, mtree, options=None): try:
for node in mtree.unidentified_leaves(): country = Country(c, strict=True)
if len(node.node_idx) == 2: except ValueError:
c = node.value[1:-1].lower() continue
if c in self.country_common_words:
continue
# only keep explicit groups (enclosed in parentheses/brackets) node.guess = Guess(country=country, confidence=1.0, raw=c)
if not node.is_explicit():
continue
try:
country = Country(c, strict=True)
except ValueError:
continue
node.guess = Guess(country=country, confidence=1.0, input=node.value, span=node.span)
def post_process(self, mtree, options=None, *args, **kwargs):
# if country is in the guessed properties, make it part of the series name
series_leaves = mtree.leaves_containing('series')
country_leaves = mtree.leaves_containing('country')
if series_leaves and country_leaves:
country_leaf = country_leaves[0]
for serie_leaf in series_leaves:
serie_leaf.guess['series'] += ' (%s)' % country_leaf.guess['country'].alpha2.upper()
#result['series'] += ' (%s)' % result['country'].alpha2.upper()

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,26 +18,21 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from guessit.date import search_date from guessit.date import search_date
import logging
log = logging.getLogger(__name__)
class GuessDate(Transformer): def guess_date(string):
def __init__(self): date, span = search_date(string)
Transformer.__init__(self, 50) if date:
return { 'date': date }, span
else:
return None, None
def supported_properties(self):
return ['date']
def guess_date(self, string, node=None, options=None): def process(mtree):
date, span = search_date(string) SingleNodeGuesser(guess_date, 1.0, log).process(mtree)
if date:
return {'date': date}, span
else:
return None, None
def process(self, mtree, options=None):
GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,145 +18,129 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.transfo import found_property
from guessit.patterns import non_episode_title, unlikely_series
import logging
from guessit.plugins.transformers import Transformer, get_transformer log = logging.getLogger(__name__)
from guessit.textutils import reorder_title
from guessit.matcher import found_property
class GuessEpisodeInfoFromPosition(Transformer): def match_from_epnum_position(mtree, node):
def __init__(self): epnum_idx = node.node_idx
Transformer.__init__(self, -200)
def supported_properties(self): # a few helper functions to be able to filter using high-level semantics
return ['title', 'series'] def before_epnum_in_same_pathgroup():
return [ leaf for leaf in mtree.unidentified_leaves()
if (leaf.node_idx[0] == epnum_idx[0] and
leaf.node_idx[1:] < epnum_idx[1:]) ]
def match_from_epnum_position(self, mtree, node): def after_epnum_in_same_pathgroup():
epnum_idx = node.node_idx return [ leaf for leaf in mtree.unidentified_leaves()
if (leaf.node_idx[0] == epnum_idx[0] and
leaf.node_idx[1:] > epnum_idx[1:]) ]
# a few helper functions to be able to filter using high-level semantics def after_epnum_in_same_explicitgroup():
def before_epnum_in_same_pathgroup(): return [ leaf for leaf in mtree.unidentified_leaves()
return [leaf for leaf in mtree.unidentified_leaves() if (leaf.node_idx[:2] == epnum_idx[:2] and
if (leaf.node_idx[0] == epnum_idx[0] and leaf.node_idx[2:] > epnum_idx[2:]) ]
leaf.node_idx[1:] < epnum_idx[1:])]
def after_epnum_in_same_pathgroup(): # epnumber is the first group and there are only 2 after it in same
return [leaf for leaf in mtree.unidentified_leaves() # path group
if (leaf.node_idx[0] == epnum_idx[0] and # -> series title - episode title
leaf.node_idx[1:] > epnum_idx[1:])] title_candidates = [ n for n in after_epnum_in_same_pathgroup()
if n.clean_value.lower() not in non_episode_title ]
if ('title' not in mtree.info and # no title
before_epnum_in_same_pathgroup() == [] and # no groups before
len(title_candidates) == 2): # only 2 groups after
def after_epnum_in_same_explicitgroup(): found_property(title_candidates[0], 'series', confidence=0.4)
return [leaf for leaf in mtree.unidentified_leaves() found_property(title_candidates[1], 'title', confidence=0.4)
if (leaf.node_idx[:2] == epnum_idx[:2] and return
leaf.node_idx[2:] > epnum_idx[2:])]
# epnumber is the first group and there are only 2 after it in same # if we have at least 1 valid group before the episodeNumber, then it's
# path group # probably the series name
# -> series title - episode title series_candidates = before_epnum_in_same_pathgroup()
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup()) if len(series_candidates) >= 1:
found_property(series_candidates[0], 'series', confidence=0.7)
if ('title' not in mtree.info and # no title # only 1 group after (in the same path group) and it's probably the
before_epnum_in_same_pathgroup() == [] and # no groups before # episode title
len(title_candidates) == 2): # only 2 groups after title_candidates = [ n for n in after_epnum_in_same_pathgroup()
if n.clean_value.lower() not in non_episode_title ]
found_property(title_candidates[0], 'series', confidence=0.4)
found_property(title_candidates[1], 'title', confidence=0.4)
return
# if we have at least 1 valid group before the episodeNumber, then it's
# probably the series name
series_candidates = before_epnum_in_same_pathgroup()
if len(series_candidates) >= 1:
found_property(series_candidates[0], 'series', confidence=0.7)
# only 1 group after (in the same path group) and it's probably the
# episode title
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
if len(title_candidates) == 1:
found_property(title_candidates[0], 'title', confidence=0.5)
return
else:
# try in the same explicit group, with lower confidence
title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
if n.clean_value.lower() not in non_episode_title
]
if len(title_candidates) == 1: if len(title_candidates) == 1:
found_property(title_candidates[0], 'title', confidence=0.5) found_property(title_candidates[0], 'title', confidence=0.4)
return
elif len(title_candidates) > 1:
found_property(title_candidates[0], 'title', confidence=0.3)
return return
else:
# try in the same explicit group, with lower confidence
title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup())
if len(title_candidates) == 1:
found_property(title_candidates[0], 'title', confidence=0.4)
return
elif len(title_candidates) > 1:
found_property(title_candidates[0], 'title', confidence=0.3)
return
# get the one with the longest value # get the one with the longest value
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup()) title_candidates = [ n for n in after_epnum_in_same_pathgroup()
if title_candidates: if n.clean_value.lower() not in non_episode_title ]
maxidx = -1 if title_candidates:
maxv = -1 maxidx = -1
for i, c in enumerate(title_candidates): maxv = -1
if len(c.clean_value) > maxv: for i, c in enumerate(title_candidates):
maxidx = i if len(c.clean_value) > maxv:
maxv = len(c.clean_value) maxidx = i
found_property(title_candidates[maxidx], 'title', confidence=0.3) maxv = len(c.clean_value)
found_property(title_candidates[maxidx], 'title', confidence=0.3)
def should_process(self, mtree, options=None):
options = options or {}
return not options.get('skip_title') and mtree.guess.get('type', '').startswith('episode')
def _filter_candidates(self, candidates): def process(mtree):
episode_special_transformer = get_transformer('guess_episode_special') eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
if episode_special_transformer: if eps:
return [n for n in candidates if not episode_special_transformer.container.find_properties(n.value, n, re_match=True)] match_from_epnum_position(mtree, eps[0])
else:
return candidates
def process(self, mtree, options=None): else:
""" # if we don't have the episode number, but at least 2 groups in the
try to identify the remaining unknown groups by looking at their # basename, then it's probably series - eptitle
position relative to other known elements basename = mtree.node_at((-2,))
""" title_candidates = [ n for n in basename.unidentified_leaves()
eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess] if n.clean_value.lower() not in non_episode_title
if eps: ]
self.match_from_epnum_position(mtree, eps[0])
else: if len(title_candidates) >= 2:
# if we don't have the episode number, but at least 2 groups in the found_property(title_candidates[0], 'series', 0.4)
# basename, then it's probably series - eptitle found_property(title_candidates[1], 'title', 0.4)
basename = mtree.node_at((-2,)) elif len(title_candidates) == 1:
# but if there's only one candidate, it's probably the series name
found_property(title_candidates[0], 'series', 0.4)
title_candidates = self._filter_candidates(basename.unidentified_leaves()) # if we only have 1 remaining valid group in the folder containing the
# file, then it's likely that it is the series name
try:
series_candidates = mtree.node_at((-3,)).unidentified_leaves()
except ValueError:
series_candidates = []
if len(title_candidates) >= 2: if len(series_candidates) == 1:
found_property(title_candidates[0], 'series', confidence=0.4) found_property(series_candidates[0], 'series', 0.3)
found_property(title_candidates[1], 'title', confidence=0.4)
elif len(title_candidates) == 1:
# but if there's only one candidate, it's probably the series name
found_property(title_candidates[0], 'series', confidence=0.4)
# if we only have 1 remaining valid group in the folder containing the # if there's a path group that only contains the season info, then the
# file, then it's likely that it is the series name # previous one is most likely the series title (ie: ../series/season X/..)
try: eps = [ node for node in mtree.nodes()
series_candidates = mtree.node_at((-3,)).unidentified_leaves() if 'season' in node.guess and 'episodeNumber' not in node.guess ]
except ValueError:
series_candidates = []
if len(series_candidates) == 1: if eps:
found_property(series_candidates[0], 'series', confidence=0.3) previous = [ node for node in mtree.unidentified_leaves()
if node.node_idx[0] == eps[0].node_idx[0] - 1 ]
if len(previous) == 1:
found_property(previous[0], 'series', 0.5)
# if there's a path group that only contains the season info, then the # reduce the confidence of unlikely series
# previous one is most likely the series title (ie: ../series/season X/..) for node in mtree.nodes():
eps = [node for node in mtree.nodes() if 'series' in node.guess:
if 'season' in node.guess and 'episodeNumber' not in node.guess] if node.guess['series'].lower() in unlikely_series:
new_confidence = node.guess.confidence('series') * 0.5
if eps: node.guess.set_confidence('series', new_confidence)
previous = [node for node in mtree.unidentified_leaves()
if node.node_idx[0] == eps[0].node_idx[0] - 1]
if len(previous) == 1:
found_property(previous[0], 'series', confidence=0.5)
def post_process(self, mtree, options=None):
for node in mtree.nodes():
if 'series' not in node.guess:
continue
node.guess['series'] = reorder_title(node.guess['series'])

View file

@ -1,62 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.matcher import found_guess
from guessit.containers import PropertiesContainer
class GuessEpisodeSpecial(Transformer):
def __init__(self):
Transformer.__init__(self, -205)
self.container = PropertiesContainer()
self.container.register_property('special', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired')
self.container.register_property('special', 'Extras?', canonical_form='Extras')
def guess_special(self, string, node=None, options=None):
properties = self.container.find_properties(string, node, 'special', multiple=True)
guesses = self.container.as_guess(properties, multiple=True)
return guesses
def second_pass_options(self, mtree, options=None):
if not mtree.guess.get('type', '').startswith('episode'):
for unidentified_leaf in mtree.unidentified_leaves():
properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, 'special')
guess = self.container.as_guess(properties)
if guess:
return {'type': 'episode'}
return None
def supported_properties(self):
return self.container.get_supported_properties()
def process(self, mtree, options=None):
if mtree.guess.get('type', '').startswith('episode') and (not mtree.info.get('episodeNumber') or mtree.info.get('season') == 0):
for title_leaf in mtree.leaves_containing('title'):
guesses = self.guess_special(title_leaf.value, title_leaf, options)
for guess in guesses:
found_guess(title_leaf, guess, update_guess=False)
for unidentified_leaf in mtree.unidentified_leaves():
guesses = self.guess_special(unidentified_leaf.value, unidentified_leaf, options)
for guess in guesses:
found_guess(unidentified_leaf, guess, update_guess=False)
return None

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,63 +18,49 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import Guess
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import episode_rexps
import re
import logging
from guessit.plugins.transformers import Transformer log = logging.getLogger(__name__)
from guessit.matcher import GuessFinder
from guessit.patterns import sep def number_list(s):
from guessit.containers import PropertiesContainer, WeakValidator, NoValidator l = [ int(n) for n in re.sub('[^0-9]+', ' ', s).split() ]
from guessit.patterns.numeral import numeral, digital_numeral, parse_numeral
from re import split as re_split if len(l) == 2:
# it is an episode interval, return all numbers in between
return range(l[0], l[1]+1)
return l
def guess_episodes_rexps(string):
for rexp, confidence, span_adjust in episode_rexps:
match = re.search(rexp, string, re.IGNORECASE)
if match:
span = (match.start() + span_adjust[0],
match.end() + span_adjust[1])
guess = Guess(match.groupdict(), confidence=confidence, raw=string[span[0]:span[1]])
# decide whether we have only a single episode number or an
# episode list
if guess.get('episodeNumber'):
eplist = number_list(guess['episodeNumber'])
guess.set('episodeNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
if len(eplist) > 1:
guess.set('episodeList', eplist, confidence=confidence, raw=string[span[0]:span[1]])
if guess.get('bonusNumber'):
eplist = number_list(guess['bonusNumber'])
guess.set('bonusNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
return guess, span
return None, None
class GuessEpisodesRexps(Transformer): def process(mtree):
def __init__(self): SingleNodeGuesser(guess_episodes_rexps, None, log).process(mtree)
Transformer.__init__(self, 20)
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
def episode_parser(value):
values = re_split('[a-zA-Z]', value)
values = [x for x in values if x]
ret = []
for letters_elt in values:
dashed_values = letters_elt.split('-')
dashed_values = [x for x in dashed_values if x]
if len(dashed_values) > 1:
for _ in range(0, len(dashed_values) - 1):
start_dash_ep = parse_numeral(dashed_values[0])
end_dash_ep = parse_numeral(dashed_values[1])
for dash_ep in range(start_dash_ep, end_dash_ep + 1):
ret.append(dash_ep)
else:
ret.append(parse_numeral(letters_elt))
if len(ret) > 1:
return {None: ret[0], 'episodeList': ret} # TODO: Should support seasonList also
elif len(ret) > 0:
return ret[0]
else:
return None
self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral)
self.container.register_property(None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator())
self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator())
self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral)
self.container.register_property(None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral)
self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral)
self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())
def supported_properties(self):
return ['episodeNumber', 'season']
def guess_episodes_rexps(self, string, node=None, options=None):
found = self.container.find_properties(string, node)
return self.container.as_guess(found, string)
def should_process(self, mtree, options=None):
return mtree.guess.get('type', '').startswith('episode')
def process(self, mtree, options=None):
GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,196 +18,182 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import Guess
import mimetypes from guessit.patterns import (subtitle_exts, info_exts, video_exts, episode_rexps,
find_properties, compute_canonical_form)
from guessit.date import valid_year
from guessit.textutils import clean_string
import os.path import os.path
import re import re
import mimetypes
import logging
from guessit.guess import Guess log = logging.getLogger(__name__)
from guessit.patterns.extension import subtitle_exts, info_exts, video_exts
from guessit.transfo import TransformerException
from guessit.plugins.transformers import Transformer, get_transformer
from guessit.matcher import log_found_guess, found_guess
from guessit.textutils import clean_string
# List of well known movies and series, hardcoded because they cannot be
# guessed appropriately otherwise
MOVIES = [ 'OSS 117' ]
SERIES = [ 'Band of Brothers' ]
class GuessFiletype(Transformer): MOVIES = [ m.lower() for m in MOVIES ]
def __init__(self): SERIES = [ s.lower() for s in SERIES ]
Transformer.__init__(self, 250)
# List of well known movies and series, hardcoded because they cannot be def guess_filetype(mtree, filetype):
# guessed appropriately otherwise # put the filetype inside a dummy container to be able to have the
MOVIES = ['OSS 117'] # following functions work correctly as closures
SERIES = ['Band of Brothers'] # this is a workaround for python 2 which doesn't have the
# 'nonlocal' keyword (python 3 does have it)
filetype_container = [filetype]
other = {}
filename = mtree.string
MOVIES = [m.lower() for m in MOVIES] def upgrade_episode():
SERIES = [s.lower() for s in SERIES] if filetype_container[0] == 'video':
filetype_container[0] = 'episode'
elif filetype_container[0] == 'subtitle':
filetype_container[0] = 'episodesubtitle'
elif filetype_container[0] == 'info':
filetype_container[0] = 'episodeinfo'
def guess_filetype(self, mtree, options=None): def upgrade_movie():
options = options or {} if filetype_container[0] == 'video':
filetype_container[0] = 'movie'
elif filetype_container[0] == 'subtitle':
filetype_container[0] = 'moviesubtitle'
elif filetype_container[0] == 'info':
filetype_container[0] = 'movieinfo'
# put the filetype inside a dummy container to be able to have the def upgrade_subtitle():
# following functions work correctly as closures if 'movie' in filetype_container[0]:
# this is a workaround for python 2 which doesn't have the filetype_container[0] = 'moviesubtitle'
# 'nonlocal' keyword which we could use here in the upgrade_* functions elif 'episode' in filetype_container[0]:
# (python 3 does have it) filetype_container[0] = 'episodesubtitle'
filetype_container = [mtree.guess.get('type')]
other = {}
filename = mtree.string
def upgrade_episode():
if filetype_container[0] == 'subtitle':
filetype_container[0] = 'episodesubtitle'
elif filetype_container[0] == 'info':
filetype_container[0] = 'episodeinfo'
elif not filetype_container[0]:
filetype_container[0] = 'episode'
def upgrade_movie():
if filetype_container[0] == 'subtitle':
filetype_container[0] = 'moviesubtitle'
elif filetype_container[0] == 'info':
filetype_container[0] = 'movieinfo'
elif not filetype_container[0]:
filetype_container[0] = 'movie'
def upgrade_subtitle():
if filetype_container[0] == 'movie':
filetype_container[0] = 'moviesubtitle'
elif filetype_container[0] == 'episode':
filetype_container[0] = 'episodesubtitle'
elif not filetype_container[0]:
filetype_container[0] = 'subtitle'
def upgrade_info():
if filetype_container[0] == 'movie':
filetype_container[0] = 'movieinfo'
elif filetype_container[0] == 'episode':
filetype_container[0] = 'episodeinfo'
elif not filetype_container[0]:
filetype_container[0] = 'info'
# look at the extension first
fileext = os.path.splitext(filename)[1][1:].lower()
if fileext in subtitle_exts:
upgrade_subtitle()
other = {'container': fileext}
elif fileext in info_exts:
upgrade_info()
other = {'container': fileext}
elif fileext in video_exts:
other = {'container': fileext}
else: else:
if fileext and not options.get('name_only'): filetype_container[0] = 'subtitle'
other = {'extension': fileext}
# check whether we are in a 'Movies', 'Tv Shows', ... folder def upgrade_info():
folder_rexps = [ if 'movie' in filetype_container[0]:
(r'Movies?', upgrade_movie), filetype_container[0] = 'movieinfo'
(r'Films?', upgrade_movie), elif 'episode' in filetype_container[0]:
(r'Tv[ _-]?Shows?', upgrade_episode), filetype_container[0] = 'episodeinfo'
(r'Series?', upgrade_episode), else:
(r'Episodes?', upgrade_episode), filetype_container[0] = 'info'
]
for frexp, upgrade_func in folder_rexps:
frexp = re.compile(frexp, re.IGNORECASE)
for pathgroup in mtree.children:
if frexp.match(pathgroup.value):
upgrade_func()
return filetype_container[0], other
# check for a few specific cases which will unintentionally make the def upgrade(type='unknown'):
# following heuristics confused (eg: OSS 117 will look like an episode, if filetype_container[0] == 'autodetect':
# season 1, epnum 17, when it is in fact a movie) filetype_container[0] = type
fname = clean_string(filename).lower()
for m in self.MOVIES:
if m in fname:
self.log.debug('Found in exception list of movies -> type = movie')
upgrade_movie()
return filetype_container[0], other
for s in self.SERIES:
if s in fname:
self.log.debug('Found in exception list of series -> type = episode')
upgrade_episode()
return filetype_container[0], other
# now look whether there are some specific hints for episode vs movie
# if we have an episode_rexp (eg: s02e13), it is an episode
episode_transformer = get_transformer('guess_episodes_rexps')
if episode_transformer:
guess = episode_transformer.guess_episodes_rexps(filename)
if guess:
self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess)
upgrade_episode()
return filetype_container[0], other
properties_transformer = get_transformer('guess_properties') # look at the extension first
if properties_transformer: fileext = os.path.splitext(filename)[1][1:].lower()
# if we have certain properties characteristic of episodes, it is an ep if fileext in subtitle_exts:
found = properties_transformer.container.find_properties(filename, mtree, 'episodeFormat') upgrade_subtitle()
guess = properties_transformer.container.as_guess(found, filename) other = { 'container': fileext }
if guess: elif fileext in info_exts:
self.log.debug('Found characteristic property of episodes: %s"', guess) upgrade_info()
upgrade_episode() other = { 'container': fileext }
return filetype_container[0], other elif fileext in video_exts:
upgrade(type='video')
other = { 'container': fileext }
else:
upgrade(type='unknown')
other = { 'extension': fileext }
found = properties_transformer.container.find_properties(filename, mtree, 'format')
guess = properties_transformer.container.as_guess(found, filename)
if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'):
# Use weak episodes only if TV or WEB source
weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
if weak_episode_transformer:
guess = weak_episode_transformer.guess_weak_episodes_rexps(filename)
if guess:
self.log.debug('Found guess_weak_episodes_rexps: %s -> type = episode', guess)
upgrade_episode()
return filetype_container[0], other
website_transformer = get_transformer('guess_website')
if website_transformer:
found = website_transformer.container.find_properties(filename, mtree, 'website')
guess = website_transformer.container.as_guess(found, filename)
if guess:
for namepart in ('tv', 'serie', 'episode'):
if namepart in guess['website']:
# origin-specific type
self.log.debug('Found characteristic property of episodes: %s', guess)
upgrade_episode()
return filetype_container[0], other
if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts): # check whether we are in a 'Movies', 'Tv Shows', ... folder
# if no episode info found, assume it's a movie folder_rexps = [ (r'Movies?', upgrade_movie),
self.log.debug('Nothing characteristic found, assuming type = movie') (r'Tv[ _-]?Shows?', upgrade_episode),
(r'Series', upgrade_episode)
]
for frexp, upgrade_func in folder_rexps:
frexp = re.compile(frexp, re.IGNORECASE)
for pathgroup in mtree.children:
if frexp.match(pathgroup.value):
upgrade_func()
# check for a few specific cases which will unintentionally make the
# following heuristics confused (eg: OSS 117 will look like an episode,
# season 1, epnum 17, when it is in fact a movie)
fname = clean_string(filename).lower()
for m in MOVIES:
if m in fname:
log.debug('Found in exception list of movies -> type = movie')
upgrade_movie() upgrade_movie()
for s in SERIES:
if s in fname:
log.debug('Found in exception list of series -> type = episode')
upgrade_episode()
if not filetype_container[0]: # now look whether there are some specific hints for episode vs movie
self.log.debug('Nothing characteristic found, assuming type = unknown') if filetype_container[0] in ('video', 'subtitle', 'info'):
filetype_container[0] = 'unknown' # if we have an episode_rexp (eg: s02e13), it is an episode
for rexp, _, _ in episode_rexps:
match = re.search(rexp, filename, re.IGNORECASE)
if match:
log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group())
upgrade_episode()
break
return filetype_container[0], other # if we have a 3-4 digit number that's not a year, maybe an episode
match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
if match:
fullnumber = int(match.group()[1:-1])
#season = fullnumber // 100
epnumber = fullnumber % 100
possible = True
def process(self, mtree, options=None): # check for validity
"""guess the file type now (will be useful later) if epnumber > 40:
""" possible = False
filetype, other = self.guess_filetype(mtree, options) if valid_year(fullnumber):
possible = False
mtree.guess.set('type', filetype, confidence=1.0) if possible:
log_found_guess(mtree.guess) log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group())
upgrade_episode()
filetype_info = Guess(other, confidence=1.0) # if we have certain properties characteristic of episodes, it is an ep
# guess the mimetype of the filename for prop, value, _, _ in find_properties(filename):
# TODO: handle other mimetypes not found on the default type_maps log.debug('prop: %s = %s' % (prop, value))
# mimetypes.types_map['.srt']='text/subtitle' if prop == 'episodeFormat':
mime, _ = mimetypes.guess_type(mtree.string, strict=False) log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
if mime is not None: upgrade_episode()
filetype_info.update({'mimetype': mime}, confidence=1.0) break
node_ext = mtree.node_at((-1,)) elif compute_canonical_form('format', value) == 'DVB':
found_guess(node_ext, filetype_info) log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
upgrade_episode()
break
if mtree.guess.get('type') in [None, 'unknown']: # origin-specific type
if options.get('name_only'): if 'tvu.org.ru' in filename:
mtree.guess.set('type', 'movie', confidence=0.6) log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
else: upgrade_episode()
raise TransformerException(__name__, 'Unknown file type')
# if no episode info found, assume it's a movie
log.debug('Nothing characteristic found, assuming type = movie')
upgrade_movie()
filetype = filetype_container[0]
return filetype, other
def process(mtree, filetype='autodetect'):
filetype, other = guess_filetype(mtree, filetype)
mtree.guess.set('type', filetype, confidence=1.0)
log.debug('Found with confidence %.2f: %s' % (1.0, mtree.guess))
filetype_info = Guess(other, confidence=1.0)
# guess the mimetype of the filename
# TODO: handle other mimetypes not found on the default type_maps
# mimetypes.types_map['.srt']='text/subtitle'
mime, _ = mimetypes.guess_type(mtree.string, strict=False)
if mime is not None:
filetype_info.update({'mimetype': mime}, confidence=1.0)
node_ext = mtree.node_at((-1,))
node_ext.guess = filetype_info
log.debug('Found with confidence %.2f: %s' % (1.0, node_ext.guess))

View file

@ -18,52 +18,54 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.plugins.transformers import Transformer from guessit.patterns import find_properties
from guessit.matcher import GuessFinder
import re import re
import logging
log = logging.getLogger(__name__)
class GuessIdnumber(Transformer): def guess_properties(string):
def __init__(self): try:
Transformer.__init__(self, -180) prop, value, pos, end = find_properties(string)[0]
return { prop: value }, (pos, end)
def supported_properties(self): except IndexError:
return ['idNumber']
_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
def guess_idnumber(self, string, node=None, options=None):
match = self._idnum.search(string)
if match is not None:
result = match.groupdict()
switch_count = 0
DIGIT = 0
LETTER = 1
OTHER = 2
last = LETTER
for c in result['idNumber']:
if c in '0123456789':
ci = DIGIT
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
ci = LETTER
else:
ci = OTHER
if ci != last:
switch_count += 1
last = ci
switch_ratio = float(switch_count) / len(result['idNumber'])
# only return the result as probable if we alternate often between
# char type (more likely for hash values than for common words)
if switch_ratio > 0.4:
return result, match.span()
return None, None return None, None
def process(self, mtree, options=None): _idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{10,})') # 1.0, (0, 0))
GuessFinder(self.guess_idnumber, 0.4, self.log, options).process_nodes(mtree.unidentified_leaves())
def guess_idnumber(string):
match = _idnum.search(string)
if match is not None:
result = match.groupdict()
switch_count = 0
DIGIT = 0
LETTER = 1
OTHER = 2
last = LETTER
for c in result['idNumber']:
if c in '0123456789':
ci = DIGIT
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
ci = LETTER
else:
ci = OTHER
if ci != last:
switch_count += 1
last = ci
switch_ratio = float(switch_count) / len(result['idNumber'])
# only return the result as probable if we alternate often between
# char type (more likely for hash values than for common words)
if switch_ratio > 0.4:
return result, match.span()
return None, None
def process(mtree):
SingleNodeGuesser(guess_idnumber, 0.4, log).process(mtree)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,152 +18,38 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import Guess
from guessit.transfo import SingleNodeGuesser
from guessit.language import search_language
import logging
from guessit.language import search_language, subtitle_prefixes, subtitle_suffixes log = logging.getLogger(__name__)
from guessit.patterns.extension import subtitle_exts
from guessit.textutils import clean_string, find_words
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
class GuessLanguage(Transformer): def guess_language(string, node, skip=None):
def __init__(self): if skip:
Transformer.__init__(self, 30) relative_skip = []
for entry in skip:
node_idx = entry['node_idx']
span = entry['span']
if node_idx == node.node_idx[:len(node_idx)]:
relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1)
relative_skip.append(relative_span)
skip = relative_skip
def supported_properties(self): language, span, confidence = search_language(string, skip=skip)
return ['language', 'subtitleLanguage'] if language:
return (Guess({'language': language},
confidence=confidence,
raw= string[span[0]:span[1]]),
span)
def guess_language(self, string, node=None, options=None): return None, None
guess = search_language(string)
return guess
def _skip_language_on_second_pass(self, mtree, node): guess_language.use_node = True
"""Check if found node is a valid language node, or if it's a false positive.
:param mtree: Tree detected on first pass.
:type mtree: :class:`guessit.matchtree.MatchTree`
:param node: Node that contains a language Guess
:type node: :class:`guessit.matchtree.MatchTree`
:return: True if a second pass skipping this node is required def process(mtree, *args, **kwargs):
:rtype: bool SingleNodeGuesser(guess_language, None, log, *args, **kwargs).process(mtree)
""" # Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo
unidentified_starts = {}
unidentified_ends = {}
property_starts = {}
property_ends = {}
title_starts = {}
title_ends = {}
for unidentified_node in mtree.unidentified_leaves():
unidentified_starts[unidentified_node.span[0]] = unidentified_node
unidentified_ends[unidentified_node.span[1]] = unidentified_node
for property_node in mtree.leaves_containing('year'):
property_starts[property_node.span[0]] = property_node
property_ends[property_node.span[1]] = property_node
for title_node in mtree.leaves_containing(['title', 'series']):
title_starts[title_node.span[0]] = title_node
title_ends[title_node.span[1]] = title_node
return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\
node.span[1] in title_starts.keys() and (node.span[0] == 0 or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys())
def second_pass_options(self, mtree, options=None):
m = mtree.matched()
to_skip_language_nodes = []
for lang_key in ('language', 'subtitleLanguage'):
langs = {}
lang_nodes = set(n for n in mtree.leaves_containing(lang_key))
for lang_node in lang_nodes:
lang = lang_node.guess.get(lang_key, None)
if self._skip_language_on_second_pass(mtree, lang_node):
# Language probably split the title. Add to skip for 2nd pass.
# if filetype is subtitle and the language appears last, just before
# the extension, then it is likely a subtitle language
parts = clean_string(lang_node.root.value).split()
if (m.get('type') in ['moviesubtitle', 'episodesubtitle'] and
(parts.index(lang_node.value) == len(parts) - 2)):
continue
to_skip_language_nodes.append(lang_node)
elif not lang in langs:
langs[lang] = lang_node
else:
# The same language was found. Keep the more confident one,
# and add others to skip for 2nd pass.
existing_lang_node = langs[lang]
to_skip = None
if (existing_lang_node.guess.confidence('language') >=
lang_node.guess.confidence('language')):
# lang_node is to remove
to_skip = lang_node
else:
# existing_lang_node is to remove
langs[lang] = lang_node
to_skip = existing_lang_node
to_skip_language_nodes.append(to_skip)
if to_skip_language_nodes:
return {'skip_nodes': to_skip_language_nodes}
return None
def should_process(self, mtree, options=None):
options = options or {}
return 'nolanguage' not in options
def process(self, mtree, options=None):
GuessFinder(self.guess_language, None, self.log, options).process_nodes(mtree.unidentified_leaves())
def promote_subtitle(self, node):
node.guess.set('subtitleLanguage', node.guess['language'],
confidence=node.guess.confidence('language'))
del node.guess['language']
def post_process(self, mtree, options=None):
# 1- try to promote language to subtitle language where it makes sense
for node in mtree.nodes():
if 'language' not in node.guess:
continue
# - if we matched a language in a file with a sub extension and that
# the group is the last group of the filename, it is probably the
# language of the subtitle
# (eg: 'xxx.english.srt')
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
node == mtree.leaves()[-2]):
self.promote_subtitle(node)
# - if we find in the same explicit group
# a subtitle prefix before the language,
# or a subtitle suffix after the language,
# then upgrade the language
explicit_group = mtree.node_at(node.node_idx[:2])
group_str = explicit_group.value.lower()
for sub_prefix in subtitle_prefixes:
if (sub_prefix in find_words(group_str) and
0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])):
self.promote_subtitle(node)
for sub_suffix in subtitle_suffixes:
if (sub_suffix in find_words(group_str) and
(node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
self.promote_subtitle(node)
# - if a language is in an explicit group just preceded by "st",
# it is a subtitle language (eg: '...st[fr-eng]...')
try:
idx = node.node_idx
previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
if previous.value.lower()[-2:] == 'st':
self.promote_subtitle(node)
except IndexError:
pass

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,160 +18,157 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import Guess
import unicodedata
import logging
from guessit.plugins.transformers import Transformer log = logging.getLogger(__name__)
from guessit.matcher import found_property
from guessit import u
class GuessMovieTitleFromPosition(Transformer): def process(mtree):
def __init__(self): def found_property(node, name, value, confidence):
Transformer.__init__(self, -200) node.guess = Guess({ name: value },
confidence=confidence,
raw=value)
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
def supported_properties(self): def found_title(node, confidence):
return ['title'] found_property(node, 'title', node.clean_value, confidence)
def should_process(self, mtree, options=None): basename = mtree.node_at((-2,))
options = options or {} all_valid = lambda leaf: len(leaf.clean_value) > 0
return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode') basename_leftover = basename.unidentified_leaves(valid=all_valid)
def process(self, mtree, options=None): try:
""" folder = mtree.node_at((-3,))
try to identify the remaining unknown groups by looking at their folder_leftover = folder.unidentified_leaves()
position relative to other known elements except ValueError:
""" folder = None
basename = mtree.node_at((-2,)) folder_leftover = []
all_valid = lambda leaf: len(leaf.clean_value) > 0
basename_leftover = basename.unidentified_leaves(valid=all_valid)
try: log.debug('folder: %s' % folder_leftover)
log.debug('basename: %s' % basename_leftover)
# specific cases:
# if we find the same group both in the folder name and the filename,
# it's a good candidate for title
if (folder_leftover and basename_leftover and
folder_leftover[0].clean_value == basename_leftover[0].clean_value):
found_title(folder_leftover[0], confidence=0.8)
return
# specific cases:
# if the basename contains a number first followed by an unidentified
# group, and the folder only contains 1 unidentified one, then we have
# a series
# ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
try:
series = folder_leftover[0]
filmNumber = basename_leftover[0]
title = basename_leftover[1]
basename_leaves = basename.leaves()
num = int(filmNumber.clean_value)
log.debug('series: %s' % series.clean_value)
log.debug('title: %s' % title.clean_value)
if (series.clean_value != title.clean_value and
series.clean_value != filmNumber.clean_value and
basename_leaves.index(filmNumber) == 0 and
basename_leaves.index(title) == 1):
found_title(title, confidence=0.6)
found_property(series, 'filmSeries',
series.clean_value, confidence=0.6)
found_property(filmNumber, 'filmNumber',
num, confidence=0.6)
return
except Exception:
pass
# specific cases:
# - movies/tttttt (yyyy)/tttttt.ccc
try:
if mtree.node_at((-4, 0)).value.lower() == 'movies':
folder = mtree.node_at((-3,)) folder = mtree.node_at((-3,))
folder_leftover = folder.unidentified_leaves()
except ValueError:
folder = None
folder_leftover = []
self.log.debug('folder: %s' % u(folder_leftover)) # Note:too generic, might solve all the unittests as they all
self.log.debug('basename: %s' % u(basename_leftover)) # contain 'movies' in their path
#
#if containing_folder.is_leaf() and not containing_folder.guess:
# containing_folder.guess =
# Guess({ 'title': clean_string(containing_folder.value) },
# confidence=0.7)
# specific cases: year_group = folder.first_leaf_containing('year')
# if we find the same group both in the folder name and the filename, groups_before = folder.previous_unidentified_leaves(year_group)
# it's a good candidate for title
if (folder_leftover and basename_leftover and
folder_leftover[0].clean_value == basename_leftover[0].clean_value):
found_property(folder_leftover[0], 'title', confidence=0.8) found_title(groups_before[0], confidence=0.8)
return return
# specific cases: except Exception:
# if the basename contains a number first followed by an unidentified pass
# group, and the folder only contains 1 unidentified one, then we have
# a series
# ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
try:
series = folder_leftover[0]
filmNumber = basename_leftover[0]
title = basename_leftover[1]
basename_leaves = basename.leaves() # if we have either format or videoCodec in the folder containing the file
# or one of its parents, then we should probably look for the title in
# there rather than in the basename
try:
props = mtree.previous_leaves_containing(mtree.children[-2],
[ 'videoCodec', 'format',
'language' ])
except IndexError:
props = []
num = int(filmNumber.clean_value) if props:
group_idx = props[0].node_idx[0]
if all(g.node_idx[0] == group_idx for g in props):
# if they're all in the same group, take leftover info from there
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
self.log.debug('series: %s' % series.clean_value) if leftover:
self.log.debug('title: %s' % title.clean_value) found_title(leftover[0], confidence=0.7)
if (series.clean_value != title.clean_value and
series.clean_value != filmNumber.clean_value and
basename_leaves.index(filmNumber) == 0 and
basename_leaves.index(title) == 1):
found_property(title, 'title', confidence=0.6)
found_property(series, 'filmSeries', confidence=0.6)
found_property(filmNumber, 'filmNumber', num, confidence=0.6)
return
except Exception:
pass
# specific cases:
# - movies/tttttt (yyyy)/tttttt.ccc
try:
if mtree.node_at((-4, 0)).value.lower() == 'movies':
folder = mtree.node_at((-3,))
# Note:too generic, might solve all the unittests as they all
# contain 'movies' in their path
#
# if containing_folder.is_leaf() and not containing_folder.guess:
# containing_folder.guess =
# Guess({ 'title': clean_string(containing_folder.value) },
# confidence=0.7)
year_group = folder.first_leaf_containing('year')
groups_before = folder.previous_unidentified_leaves(year_group)
found_property(groups_before[0], 'title', confidence=0.8)
return return
except Exception: # look for title in basename if there are some remaining undidentified
pass # groups there
if basename_leftover:
title_candidate = basename_leftover[0]
# if we have either format or videoCodec in the folder containing the file # if basename is only one word and the containing folder has at least
# or one of its parents, then we should probably look for the title in # 3 words in it, we should take the title from the folder name
# there rather than in the basename # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
try: # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
props = mtree.previous_leaves_containing(mtree.children[-2], if (title_candidate.clean_value.count(' ') == 0 and
['videoCodec', 'format', folder_leftover and
'language']) folder_leftover[0].clean_value.count(' ') >= 2):
except IndexError:
props = []
if props: found_title(folder_leftover[0], confidence=0.7)
group_idx = props[0].node_idx[0]
if all(g.node_idx[0] == group_idx for g in props):
# if they're all in the same group, take leftover info from there
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
if leftover:
found_property(leftover[0], 'title', confidence=0.7)
return
# look for title in basename if there are some remaining unidentified
# groups there
if basename_leftover:
# if basename is only one word and the containing folder has at least
# 3 words in it, we should take the title from the folder name
# ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
# ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
if (basename_leftover[0].clean_value.count(' ') == 0 and
folder_leftover and
folder_leftover[0].clean_value.count(' ') >= 2):
found_property(folder_leftover[0], 'title', confidence=0.7)
return
# if there are only many unidentified groups, take the first of which is
# not inside brackets or parentheses.
# ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
if basename_leftover[0].is_explicit():
for basename_leftover_elt in basename_leftover:
if not basename_leftover_elt.is_explicit():
found_property(basename_leftover_elt, 'title', confidence=0.8)
return
# if all else fails, take the first remaining unidentified group in the
# basename as title
found_property(basename_leftover[0], 'title', confidence=0.6)
return return
# if there are no leftover groups in the basename, look in the folder name # if there are only 2 unidentified groups, the first of which is inside
if folder_leftover: # brackets or parentheses, we take the second one for the title:
found_property(folder_leftover[0], 'title', confidence=0.5) # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
if len(basename_leftover) == 2 and basename_leftover[0].is_explicit():
found_title(basename_leftover[1], confidence=0.8)
return return
# if nothing worked, look if we have a very small group at the beginning # if all else fails, take the first remaining unidentified group in the
# of the basename # basename as title
basename = mtree.node_at((-2,)) found_title(title_candidate, confidence=0.6)
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True) return
if basename_leftover:
found_property(basename_leftover[0], 'title', confidence=0.4) # if there are no leftover groups in the basename, look in the folder name
return if folder_leftover:
found_title(folder_leftover[0], confidence=0.5)
return
# if nothing worked, look if we have a very small group at the beginning
# of the basename
basename = mtree.node_at((-2,))
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
if basename_leftover:
found_title(basename_leftover[0], confidence=0.4)
return

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,213 +18,21 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import find_properties
import logging
from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer log = logging.getLogger(__name__)
from guessit.patterns.extension import subtitle_exts, video_exts, info_exts
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
class GuessProperties(Transformer): def guess_properties(string):
def __init__(self): try:
Transformer.__init__(self, 35) prop, value, pos, end = find_properties(string)[0]
return { prop: value }, (pos, end)
except IndexError:
return None, None
self.container = PropertiesContainer()
self.qualities = QualitiesContainer()
def register_property(propname, props): def process(mtree):
"""props a dict of {value: [patterns]}""" SingleNodeGuesser(guess_properties, 1.0, log).process(mtree)
for canonical_form, patterns in props.items():
if isinstance(patterns, tuple):
patterns2, kwargs = patterns
kwargs = dict(kwargs)
kwargs['canonical_form'] = canonical_form
self.container.register_property(propname, *patterns2, **kwargs)
else:
self.container.register_property(propname, *patterns, canonical_form=canonical_form)
def register_quality(propname, quality_dict):
"""props a dict of {canonical_form: quality}"""
for canonical_form, quality in quality_dict.items():
self.qualities.register_quality(propname, canonical_form, quality)
register_property('container', {'mp4': ['MP4']})
# http://en.wikipedia.org/wiki/Pirated_movie_release_types
register_property('format', {'VHS': ['VHS'],
'Cam': ['CAM', 'CAMRip'],
'Telesync': ['TELESYNC', 'PDVD'],
'Telesync': (['TS'], {'confidence': 0.2}),
'Workprint': ['WORKPRINT', 'WP'],
'Telecine': ['TELECINE', 'TC'],
'PPV': ['PPV', 'PPV-Rip'], # Pay Per View
'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS'],
'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
'VOD': ['VOD', 'VOD-Rip'],
'WEBRip': ['WEB-Rip'],
'WEB-DL': ['WEB-DL'],
'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
'BluRay': ['Blu-ray', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
})
register_quality('format', {'VHS': -100,
'Cam': -90,
'Telesync': -80,
'Workprint': -70,
'Telecine': -60,
'PPV': -50,
'TV': -30,
'DVB': -20,
'DVD': 0,
'HDTV': 20,
'VOD': 40,
'WEBRip': 50,
'WEB-DL': 60,
'HD-DVD': 80,
'BluRay': 100
})
register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
'368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
'480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
'480p': (['hr'], {'confidence': 0.2}),
'576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
'720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
'900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
'1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
'1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080(?:p?x?)'],
'4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
})
register_quality('screenSize', {'360p': -300,
'368p': -200,
'480p': -100,
'576p': 0,
'720p': 100,
'900p': 130,
'1080i': 180,
'1080p': 200,
'4K': 400
})
_videoCodecProperty = {'Real': ['Rv\d{2}'], # http://en.wikipedia.org/wiki/RealVideo
'Mpeg2': ['Mpeg2'],
'DivX': ['DVDivX', 'DivX'],
'XviD': ['XviD'],
'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
}
register_property('videoCodec', _videoCodecProperty)
register_quality('videoCodec', {'Real': -50,
'Mpeg2': -30,
'DivX': -10,
'XviD': 0,
'h264': 100,
'h265': 150
})
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
# http://fr.wikipedia.org/wiki/H.264
self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
register_quality('videoProfile', {'BP': -20,
'XP': -10,
'MP': 0,
'HP': 10,
'10bit': 15,
'Hi422P': 25,
'Hi444PP': 35
})
# has nothing to do here (or on filenames for that matter), but some
# releases use it and it helps to identify release groups, so we adapt
register_property('videoApi', {'DXVA': ['DXVA']})
register_property('audioCodec', {'MP3': ['MP3'],
'DolbyDigital': ['DD'],
'AAC': ['AAC'],
'AC3': ['AC3'],
'Flac': ['FLAC'],
'DTS': ['DTS'],
'TrueHD': ['True-HD']
})
register_quality('audioCodec', {'MP3': 10,
'DolbyDigital': 30,
'AAC': 35,
'AC3': 40,
'Flac': 45,
'DTS': 60,
'TrueHD': 70
})
self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))
register_quality('audioProfile', {'HD': 20,
'HDMA': 50,
'LC': 0,
'HQ': 0,
'HE': 20
})
register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch'],
'5.1': ['5[\W_]1', '5ch'],
'2.0': ['2[\W_]0', '2ch', 'stereo'],
'1.0': ['1[\W_]0', '1ch', 'mono']
})
register_quality('audioChannels', {'7.1': 200,
'5.1': 100,
'2.0': 0,
'1.0': -100
})
self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')
register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
'DualAudio': ['Dual-Audio'],
'WideScreen': ['ws', 'wide-screen'],
})
self.container.register_property('other', 'Real', 'Fix', canonical_form="Proper", validator=WeakValidator())
self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form="Proper")
self.container.register_canonical_properties('other', 'R5', 'Screener', '3D', 'HD', 'HQ', 'DDC')
self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())
for prop in self.container.get_properties('format'):
self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')
for exts in (subtitle_exts, info_exts, video_exts):
for container in exts:
self.container.register_property('container', container, confidence=0.3)
def guess_properties(self, string, node=None, options=None):
found = self.container.find_properties(string, node)
return self.container.as_guess(found, string)
def supported_properties(self):
return self.container.get_supported_properties()
def process(self, mtree, options=None):
GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
def rate_quality(self, guess, *props):
return self.qualities.rate_quality(guess, *props)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,132 +18,69 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import prop_multi, compute_canonical_form, _dash, _psep
import re
import logging
from guessit.plugins.transformers import Transformer log = logging.getLogger(__name__)
from guessit.matcher import GuessFinder, found_property, found_guess
from guessit.containers import PropertiesContainer def get_patterns(property_name):
from guessit.patterns import sep return [ p.replace(_dash, _psep) for patterns in prop_multi[property_name].values() for p in patterns ]
from guessit.guess import Guess
from guessit.textutils import strip_brackets CODECS = get_patterns('videoCodec')
FORMATS = get_patterns('format')
VAPIS = get_patterns('videoApi')
# RG names following a codec or format, with a potential space or dash inside the name
GROUP_NAMES = [ r'(?P<videoCodec>' + codec + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
for codec in CODECS ]
GROUP_NAMES += [ r'(?P<format>' + fmt + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
for fmt in FORMATS ]
GROUP_NAMES += [ r'(?P<videoApi>' + api + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
for api in VAPIS ]
GROUP_NAMES2 = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
for codec in CODECS ]
GROUP_NAMES2 += [ r'\.(?P<format>' + fmt + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
for fmt in FORMATS ]
GROUP_NAMES2 += [ r'\.(?P<videoApi>' + vapi + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
for vapi in VAPIS ]
GROUP_NAMES = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES ]
GROUP_NAMES2 = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES2 ]
def adjust_metadata(md):
return dict((property_name, compute_canonical_form(property_name, value) or value)
for property_name, value in md.items())
class GuessReleaseGroup(Transformer): def guess_release_group(string):
def __init__(self): # first try to see whether we have both a known codec and a known release group
Transformer.__init__(self, -190) for rexp in GROUP_NAMES:
self.container = PropertiesContainer(canonical_from_pattern=False) match = rexp.search(string)
self._allowed_groupname_pattern = '[\w@#€£$&]' while match:
self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'], metadata = match.groupdict()
lambda elt: self._is_number(elt), # make sure this is an actual release group we caught
] release_group = (compute_canonical_form('releaseGroup', metadata['releaseGroup']) or
# If the previous property in this list, the match will be considered as safe compute_canonical_form('weakReleaseGroup', metadata['releaseGroup']))
# and group name can contain a separator. if release_group:
self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels'] return adjust_metadata(metadata), (match.start(1), match.end(2))
self.container.sep_replace_char = '-' # we didn't find anything conclusive, keep searching
self.container.canonical_from_pattern = False match = rexp.search(string, match.span()[0]+1)
self.container.enhance = True
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+')
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')
def supported_properties(self): # pick anything as releaseGroup as long as we have a codec in front
return self.container.get_supported_properties() # this doesn't include a potential dash ('-') ending the release group
# eg: [...].X264-HiS@SiLUHD-English.[...]
for rexp in GROUP_NAMES2:
match = rexp.search(string)
if match:
return adjust_metadata(match.groupdict()), (match.start(1), match.end(2))
def _is_number(self, s): return None, None
try:
int(s)
return True
except ValueError:
return False
def validate_group_name(self, guess):
val = guess['releaseGroup']
if len(val) >= 2:
if '-' in val: def process(mtree):
checked_val = "" SingleNodeGuesser(guess_release_group, 0.8, log).process(mtree)
for elt in val.split('-'):
forbidden = False
for forbidden_lambda in self._forbidden_groupname_lambda:
forbidden = forbidden_lambda(elt.lower())
if forbidden:
break
if not forbidden:
if checked_val:
checked_val += '-'
checked_val += elt
else:
break
val = checked_val
if not val:
return False
guess['releaseGroup'] = val
forbidden = False
for forbidden_lambda in self._forbidden_groupname_lambda:
forbidden = forbidden_lambda(val.lower())
if forbidden:
break
if not forbidden:
return True
return False
def is_leaf_previous(self, leaf, node):
if leaf.span[1] <= node.span[0]:
for idx in range(leaf.span[1], node.span[0]):
if not leaf.root.value[idx] in sep:
return False
return True
return False
def guess_release_group(self, string, node=None, options=None):
found = self.container.find_properties(string, node, 'releaseGroup')
guess = self.container.as_guess(found, string, self.validate_group_name, sep_replacement='-')
validated_guess = None
if guess:
explicit_group_node = node.group_node()
if explicit_group_node:
for leaf in explicit_group_node.leaves_containing(self.previous_safe_properties):
if self.is_leaf_previous(leaf, node):
if leaf.root.value[leaf.span[1]] == '-':
guess.metadata().confidence = 1
else:
guess.metadata().confidence = 0.7
validated_guess = guess
if not validated_guess:
# If previous group last leaf is identified as a safe property,
# consider the raw value as a releaseGroup
previous_group_node = node.previous_group_node()
if previous_group_node:
for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
if self.is_leaf_previous(leaf, node):
guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
if self.validate_group_name(guess):
node.guess = guess
validated_guess = guess
if validated_guess:
# If following group nodes have only one unidentified leaf, it belongs to the release group
next_group_node = node
while True:
next_group_node = next_group_node.next_group_node()
if next_group_node:
leaves = next_group_node.leaves()
if len(leaves) == 1 and not leaves[0].guess:
validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
leaves[0].guess = validated_guess
else:
break
else:
break
if validated_guess:
# Strip brackets
validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])
return validated_guess
def process(self, mtree, options=None):
GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,41 +18,33 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, \ from __future__ import unicode_literals
unicode_literals from guessit import Guess
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import video_rexps, sep
import re
import logging
from guessit.patterns import _psep log = logging.getLogger(__name__)
from guessit.containers import PropertiesContainer
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from guessit.patterns.numeral import parse_numeral
class GuessVideoRexps(Transformer): def guess_video_rexps(string):
def __init__(self): string = '-' + string + '-'
Transformer.__init__(self, 25) for rexp, confidence, span_adjust in video_rexps:
match = re.search(sep + rexp + sep, string, re.IGNORECASE)
if match:
metadata = match.groupdict()
# is this the better place to put it? (maybe, as it is at least
# the soonest that we can catch it)
if metadata.get('cdNumberTotal', -1) is None:
del metadata['cdNumberTotal']
span = (match.start() + span_adjust[0],
match.end() + span_adjust[1] - 2)
return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]),
span)
self.container = PropertiesContainer(canonical_from_pattern=False) return None, None
self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral)
self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral)
self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral) def process(mtree):
SingleNodeGuesser(guess_video_rexps, None, log).process(mtree)
self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition')
self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition')
self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition')
self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition')
self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')
def supported_properties(self):
return self.container.get_supported_properties()
def guess_video_rexps(self, string, node=None, options=None):
found = self.container.find_properties(string, node)
return self.container.as_guess(found, string)
def process(self, mtree, options=None):
GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,52 +18,45 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit import Guess
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import weak_episode_rexps
import re
import logging
from guessit.plugins.transformers import Transformer log = logging.getLogger(__name__)
from guessit.matcher import GuessFinder
from guessit.patterns import sep
from guessit.containers import PropertiesContainer
from guessit.patterns.numeral import numeral, parse_numeral
from guessit.date import valid_year
class GuessWeakEpisodesRexps(Transformer): def guess_weak_episodes_rexps(string, node):
def __init__(self): if 'episodeNumber' in node.root.info:
Transformer.__init__(self, 15) return None, None
self.properties = PropertiesContainer(enhance=False, canonical_from_pattern=False) for rexp, span_adjust in weak_episode_rexps:
match = re.search(rexp, string, re.IGNORECASE)
if match:
metadata = match.groupdict()
span = (match.start() + span_adjust[0],
match.end() + span_adjust[1])
def _formater(episodeNumber): epnum = int(metadata['episodeNumber'])
epnum = parse_numeral(episodeNumber) if epnum > 100:
if not valid_year(epnum): season, epnum = epnum // 100, epnum % 100
if epnum > 100: # episodes which have a season > 25 are most likely errors
season, epnum = epnum // 100, epnum % 100 # (Simpsons is at 23!)
# episodes which have a season > 50 are most likely errors if season > 25:
# (Simpson is at 25!) continue
if season > 50: return Guess({ 'season': season,
return None 'episodeNumber': epnum },
return {'season': season, 'episodeNumber': epnum} confidence=0.6, raw=string[span[0]:span[1]]), span
else: else:
return epnum return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span
self.properties.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater) return None, None
self.properties.register_property('episodeNumber', '(?:episode)' + sep + '(' + numeral + ')[^0-9]', confidence=0.3)
def supported_properties(self):
return self.properties.get_supported_properties()
def guess_weak_episodes_rexps(self, string, node=None, options=None): guess_weak_episodes_rexps.use_node = True
if node and 'episodeNumber' in node.root.info:
return None
properties = self.properties.find_properties(string, node)
guess = self.properties.as_guess(properties, string)
return guess def process(mtree):
SingleNodeGuesser(guess_weak_episodes_rexps, 0.6, log).process(mtree)
def should_process(self, mtree, options=None):
return mtree.guess.get('type', '').startswith('episode')
def process(self, mtree, options=None):
GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,49 +18,22 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, \ from __future__ import unicode_literals
unicode_literals from guessit.transfo import SingleNodeGuesser
from guessit.patterns import websites
import logging
from guessit.patterns import build_or_pattern log = logging.getLogger(__name__)
from guessit.containers import PropertiesContainer
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from pkg_resources import resource_stream # @UnresolvedImport
class GuessWebsite(Transformer): def guess_website(string):
def __init__(self): low = string.lower()
Transformer.__init__(self, 45) for site in websites:
pos = low.find(site.lower())
if pos != -1:
return {'website': site}, (pos, pos + len(site))
return None, None
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
tlds = [] def process(mtree):
SingleNodeGuesser(guess_website, 1.0, log).process(mtree)
f = resource_stream('guessit', 'tlds-alpha-by-domain.txt')
f.readline()
next(f)
for tld in f:
tld = tld.strip()
if b'--' in tld:
continue
tlds.append(tld.decode("utf-8"))
f.close()
tlds_pattern = build_or_pattern(tlds) # All registered domain extension
safe_tlds_pattern = build_or_pattern(['com', 'org', 'net']) # For sure a website extension
safe_subdomains_pattern = build_or_pattern(['www']) # For sure a website subdomain
safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net']) # Those words before a tlds are sure
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
def supported_properties(self):
return self.container.get_supported_properties()
def guess_website(self, string, node=None, options=None):
found = self.container.find_properties(string, node, 'website')
return self.container.as_guess(found, string)
def process(self, mtree, options=None):
GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,32 +18,33 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from guessit.date import search_year from guessit.date import search_year
import logging
log = logging.getLogger(__name__)
class GuessYear(Transformer): def guess_year(string):
def __init__(self): year, span = search_year(string)
Transformer.__init__(self, -160) if year:
return { 'year': year }, span
else:
return None, None
def supported_properties(self): def guess_year_skip_first(string):
return ['year'] year, span = search_year(string)
if year:
year2, span2 = guess_year(string[span[1]:])
if year2:
return year2, (span2[0]+span[1], span2[1]+span[1])
def guess_year(self, string, node=None, options=None): return None, None
year, span = search_year(string)
if year:
return {'year': year}, span
else:
return None, None
def second_pass_options(self, mtree, options=None):
year_nodes = mtree.leaves_containing('year')
if len(year_nodes) > 1:
return {'skip_nodes': year_nodes[:len(year_nodes) - 1]}
return None
def process(self, mtree, options=None): def process(mtree, skip_first_year=False):
GuessFinder(self.guess_year, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves()) if skip_first_year:
SingleNodeGuesser(guess_year_skip_first, 1.0, log).process(mtree)
else:
SingleNodeGuesser(guess_year, 1.0, log).process(mtree)

View file

@ -0,0 +1,73 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.patterns import subtitle_exts
from guessit.textutils import reorder_title, find_words
import logging
log = logging.getLogger(__name__)
def process(mtree):
# 1- try to promote language to subtitle language where it makes sense
for node in mtree.nodes():
if 'language' not in node.guess:
continue
def promote_subtitle():
# pylint: disable=W0631
node.guess.set('subtitleLanguage', node.guess['language'],
confidence=node.guess.confidence('language'))
del node.guess['language']
# - if we matched a language in a file with a sub extension and that
# the group is the last group of the filename, it is probably the
# language of the subtitle
# (eg: 'xxx.english.srt')
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
node == mtree.leaves()[-2]):
promote_subtitle()
# - if we find the word 'sub' before the language, and in the same explicit
# group, then upgrade the language
explicit_group = mtree.node_at(node.node_idx[:2])
group_str = explicit_group.value.lower()
if ('sub' in find_words(group_str) and
0 <= group_str.find('sub') < (node.span[0] - explicit_group.span[0])):
promote_subtitle()
# - if a language is in an explicit group just preceded by "st",
# it is a subtitle language (eg: '...st[fr-eng]...')
try:
idx = node.node_idx
previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
if previous.value.lower()[-2:] == 'st':
promote_subtitle()
except IndexError:
pass
# 2- ", the" at the end of a series title should be prepended to it
for node in mtree.nodes():
if 'series' not in node.guess:
continue
node.guess['series'] = reorder_title(node.guess['series'])

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,32 +18,27 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.textutils import find_first_level_groups from guessit.textutils import find_first_level_groups
from guessit.patterns import group_delimiters from guessit.patterns import group_delimiters
from functools import reduce import functools
import logging
log = logging.getLogger(__name__)
class SplitExplicitGroups(Transformer): def process(mtree):
def __init__(self): """return the string split into explicit groups, that is, those either
Transformer.__init__(self, 245) between parenthese, square brackets or curly braces, and those separated
by a dash."""
for c in mtree.children:
groups = find_first_level_groups(c.value, group_delimiters[0])
for delimiters in group_delimiters:
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
groups = functools.reduce(flatten, groups, [])
def process(self, mtree, options=None): # do not do this at this moment, it is not strong enough and can break other
"""split each of those into explicit groups (separated by parentheses or square brackets) # patterns, such as dates, etc...
#groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
:return: return the string split into explicit groups, that is, those either c.split_on_components(groups)
between parenthese, square brackets or curly braces, and those separated
by a dash."""
for c in mtree.children:
groups = find_first_level_groups(c.value, group_delimiters[0])
for delimiters in group_delimiters:
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
groups = reduce(flatten, groups, [])
# do not do this at this moment, it is not strong enough and can break other
# patterns, such as dates, etc...
# groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
c.split_on_components(groups)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,30 +18,25 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.patterns import sep from guessit.patterns import sep
import re import re
import logging
log = logging.getLogger(__name__)
class SplitOnDash(Transformer): def process(mtree):
def __init__(self): for node in mtree.unidentified_leaves():
Transformer.__init__(self, 190) indices = []
def process(self, mtree, options=None): didx = 0
"""split into '-' separated subgroups (with required separator chars pattern = re.compile(sep + '-' + sep)
around the dash) match = pattern.search(node.value)
""" while match:
for node in mtree.unidentified_leaves(): span = match.span()
indices = [] indices.extend([ span[0], span[1] ])
match = pattern.search(node.value, span[1])
pattern = re.compile(sep + '-' + sep) if indices:
match = pattern.search(node.value) node.partition(indices)
while match:
span = match.span()
indices.extend([span[0], span[1]])
match = pattern.search(node.value, span[1])
if indices:
node.partition(indices)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GuessIt - A library for guessing information from filenames # GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com> # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# #
# GuessIt is free software; you can redistribute it and/or modify it under # GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by # the terms of the Lesser GNU General Public License as published by
@ -18,28 +18,19 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import unicode_literals
from guessit.plugins.transformers import Transformer
from guessit import fileutils from guessit import fileutils
from os.path import splitext import os.path
import logging
log = logging.getLogger(__name__)
class SplitPathComponents(Transformer): def process(mtree):
def __init__(self): """Returns the filename split into [ dir*, basename, ext ]."""
Transformer.__init__(self, 255) components = fileutils.split_path(mtree.value)
basename = components.pop(-1)
components += list(os.path.splitext(basename))
components[-1] = components[-1][1:] # remove the '.' from the extension
def process(self, mtree, options=None): mtree.split_on_components(components)
"""first split our path into dirs + basename + ext
:return: the filename split into [ dir*, basename, ext ]
"""
if not options.get('name_only'):
components = fileutils.split_path(mtree.value)
basename = components.pop(-1)
components += list(splitext(basename))
components[-1] = components[-1][1:] # remove the '.' from the extension
mtree.split_on_components(components)
else:
mtree.split_on_components([mtree.value, ''])