Switched out guessit libs for the one CP uses, seems to have less depends

2025-07-16 02:02:53 -07:00 · 2014-04-21 23:07:04 -07:00 · 2014-04-21 23:07:04 -07:00 · 6fea9ddb40
commit 6fea9ddb40
parent a6cd0f156b
65 changed files with 2034 additions and 7313 deletions
--- a/lib/guessit/init.py
+++ b/lib/guessit/init.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,11 +18,9 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 import pkg_resources
 from .__version__ import __version__
 __version__ = '0.6.2'
 __all__ = ['Guess', 'Language',
           'guess_file_info', 'guess_video_info',
           'guess_movie_info', 'guess_episode_info']
@ -32,69 +30,58 @@ __all__ = ['Guess', 'Language',
 # it will then always be available
 # with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
 import sys
-if sys.version_info[0] >= 3:  # pragma: no cover
+if sys.version_info[0] >= 3:
-    PY2, PY3 = False, True
+    PY3 = True
    unicode_text_type = str
    native_text_type = str
    base_text_type = str
    def u(x):
        return str(x)
    def s(x):
        return x
    class UnicodeMixin(object):
        __str__ = lambda x: x.__unicode__()
    import binascii
    def to_hex(x):
        return binascii.hexlify(x).decode('utf-8')
-else:   # pragma: no cover
+else:
-    PY2, PY3 = True, False
+    PY3 = False
-    __all__ = [str(s) for s in __all__]  # fix imports for python2
+    __all__ = [ str(s) for s in __all__ ] # fix imports for python2
    unicode_text_type = unicode
    native_text_type = str
    base_text_type = basestring
    def u(x):
        if isinstance(x, str):
            return x.decode('utf-8')
        if isinstance(x, list):
            return [u(s) for s in x]
        return unicode(x)
    def s(x):
        if isinstance(x, unicode):
            return x.encode('utf-8')
        if isinstance(x, list):
-            return [s(y) for y in x]
+            return [ s(y) for y in x ]
        if isinstance(x, tuple):
            return tuple(s(y) for y in x)
        if isinstance(x, dict):
            return dict((s(key), s(value)) for key, value in x.items())
        return x
    class UnicodeMixin(object):
        __str__ = lambda x: unicode(x).encode('utf-8')
    def to_hex(x):
        return x.encode('hex')
    range = xrange
 from guessit.guess import Guess, merge_all
 from guessit.language import Language
 from guessit.matcher import IterativeMatcher
-from guessit.textutils import clean_string, is_camel, from_camel
+from guessit.textutils import clean_string
 import os.path
 import logging
 import json
 log = logging.getLogger(__name__)
 class NullHandler(logging.Handler):
    def emit(self, record):
        pass
@ -104,74 +91,137 @@ h = NullHandler()
 log.addHandler(h)
-def _guess_filename(filename, options=None, **kwargs):
+def _guess_filename(filename, filetype):
-    mtree = _build_filename_mtree(filename, options=options, **kwargs)
+    def find_nodes(tree, props):
-    _add_camel_properties(mtree, options=options)
+        """Yields all nodes containing any of the given props."""
-    return mtree.matched()
+        if isinstance(props, base_text_type):
            props = [props]
        for node in tree.nodes():
            if any(prop in node.guess for prop in props):
                yield node
    def warning(title):
        log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string()))
        return m
    mtree = IterativeMatcher(filename, filetype=filetype)
    m = mtree.matched()
    second_pass_opts = []
    second_pass_transfo_opts = {}
    # if there are multiple possible years found, we assume the first one is
    # part of the title, reparse the tree taking this into account
    years = set(n.value for n in find_nodes(mtree.match_tree, 'year'))
    if len(years) >= 2:
        second_pass_opts.append('skip_first_year')
    to_skip_language_nodes = []
    title_nodes = set(n for n in find_nodes(mtree.match_tree, ['title', 'series']))
    title_spans = {}
    for title_node in title_nodes:
        title_spans[title_node.span[0]] = title_node
        title_spans[title_node.span[1]] = title_node
    for lang_key in ('language', 'subtitleLanguage'):
        langs = {}
        lang_nodes = set(n for n in find_nodes(mtree.match_tree, lang_key))
        for lang_node in lang_nodes:
            lang = lang_node.guess.get(lang_key, None)
            if len(lang_node.value) > 3 and (lang_node.span[0] in title_spans.keys() or lang_node.span[1] in title_spans.keys()):
                # Language is next or before title, and is not a language code. Add to skip for 2nd pass.
                # if filetype is subtitle and the language appears last, just before
                # the extension, then it is likely a subtitle language
                parts = clean_string(lang_node.root.value).split()
                if m['type'] in ['moviesubtitle', 'episodesubtitle'] and (parts.index(lang_node.value) == len(parts) - 2):
                    continue
                to_skip_language_nodes.append(lang_node)
            elif not lang in langs:
                langs[lang] = lang_node
            else:
                # The same language was found. Keep the more confident one, and add others to skip for 2nd pass.
                existing_lang_node = langs[lang]
                to_skip = None
                if existing_lang_node.guess.confidence('language') >= lang_node.guess.confidence('language'):
                    # lang_node is to remove
                    to_skip = lang_node
                else:
                    # existing_lang_node is to remove
                    langs[lang] = lang_node
                    to_skip = existing_lang_node
                to_skip_language_nodes.append(to_skip)
-def _build_filename_mtree(filename, options=None, **kwargs):
+    if to_skip_language_nodes:
-    mtree = IterativeMatcher(filename, options=options, **kwargs)
+        second_pass_transfo_opts['guess_language'] = (
-    second_pass_options = mtree.second_pass_options
+            ((), { 'skip': [ { 'node_idx': node.parent.node_idx,
-    if second_pass_options:
+                               'span': node.span }
-        log.info("Running 2nd pass")
+                             for node in to_skip_language_nodes ] }))
-        merged_options = dict(options)
+
-        merged_options.update(second_pass_options)
+    if second_pass_opts or second_pass_transfo_opts:
-        mtree = IterativeMatcher(filename, options=merged_options, **kwargs)
+        # 2nd pass is needed
-    return mtree
+        log.info("Running 2nd pass with options: %s" % second_pass_opts)
        log.info("Transfo options: %s" % second_pass_transfo_opts)
        mtree = IterativeMatcher(filename, filetype=filetype,
                                 opts=second_pass_opts,
                                 transfo_opts=second_pass_transfo_opts)
    m = mtree.matched()
    if 'language' not in m and 'subtitleLanguage' not in m or 'title' not in m:
        return m
    # if we found some language, make sure we didn't cut a title or sth...
    mtree2 = IterativeMatcher(filename, filetype=filetype,
                              opts=['nolanguage', 'nocountry'])
    m2 = mtree2.matched()
    if m.get('title') != m2.get('title'):
        title = next(find_nodes(mtree.match_tree, 'title'))
        title2 = next(find_nodes(mtree2.match_tree, 'title'))
        # if a node is in an explicit group, then the correct title is probably
        # the other one
        if title.root.node_at(title.node_idx[:2]).is_explicit():
            return m2
        elif title2.root.node_at(title2.node_idx[:2]).is_explicit():
            return m
    return m
-def _add_camel_properties(mtree, options=None, **kwargs):
+def guess_file_info(filename, filetype, info=None):
    prop = 'title' if mtree.matched().get('type') != 'episode' else 'series'
    value = mtree.matched().get(prop)
    _guess_camel_string(mtree, value, options=options, skip_title=False, **kwargs)
    for leaf in mtree.match_tree.unidentified_leaves():
        value = leaf.value
        _guess_camel_string(mtree, value, options=options, skip_title=True, **kwargs)
 def _guess_camel_string(mtree, string, options=None, skip_title=False, **kwargs):
    if string and is_camel(string):
        log.info('"%s" is camel cased. Try to detect more properties.' % (string,))
        uncameled_value = from_camel(string)
        camel_tree = _build_filename_mtree(uncameled_value, options=options, name_only=True, skip_title=skip_title, **kwargs)
        if len(camel_tree.matched()) > 0:
            # Title has changed.
            mtree.matched().update(camel_tree.matched())
            return True
    return False
 def guess_file_info(filename, info=None, options=None, **kwargs):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.
-    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
+    >>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
-    >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
+    {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
    >>> g['hash_md5'], g['hash_sha1']
    ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
    """
    info = info or 'filename'
    options = options or {}
    result = []
    hashers = []
    # Force unicode as soon as possible
    filename = u(filename)
    if info is None:
        info = ['filename']
    if isinstance(info, base_text_type):
        info = [info]
    for infotype in info:
        if infotype == 'filename':
-            result.append(_guess_filename(filename, options, **kwargs))
+            result.append(_guess_filename(filename, filetype))
        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file
            try:
-                result.append(Guess({infotype: hash_file(filename)},
+                result.append(Guess({'hash_mpc': hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)
@ -179,7 +229,7 @@ def guess_file_info(filename, info=None, options=None, **kwargs):
        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file
            try:
-                result.append(Guess({infotype: hash_file(filename)},
+                result.append(Guess({'hash_ed2k': hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)
@ -217,16 +267,23 @@ def guess_file_info(filename, info=None, options=None, **kwargs):
    result = merge_all(result)
    # last minute adjustments
    # if country is in the guessed properties, make it part of the filename
    if 'series' in result and 'country' in result:
        result['series'] += ' (%s)' % result['country'].alpha2.upper()
    return result
-def guess_video_info(filename, info=None, options=None, **kwargs):
+def guess_video_info(filename, info=None):
-    return guess_file_info(filename, info=info, options=options, type='video', **kwargs)
+    return guess_file_info(filename, 'autodetect', info)
-def guess_movie_info(filename, info=None, options=None, **kwargs):
+def guess_movie_info(filename, info=None):
-    return guess_file_info(filename, info=info, options=options, type='movie', **kwargs)
+    return guess_file_info(filename, 'movie', info)
-def guess_episode_info(filename, info=None, options=None, **kwargs):
+def guess_episode_info(filename, info=None):
-    return guess_file_info(filename, info=info, options=options, type='episode', **kwargs)
+    return guess_file_info(filename, 'episode', info)
--- a/lib/guessit/main.py
+++ b/lib/guessit/main.py
@ -2,8 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -19,199 +18,109 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from __future__ import print_function
 from guessit import u
 from guessit import slogging, guess_file_info
 from optparse import OptionParser
 import logging
 import sys
 import os
-
+import locale
 from guessit import PY2, u, guess_file_info
 from guessit.options import option_parser
-def guess_file(filename, info='filename', options=None, **kwargs):
+def detect_filename(filename, filetype, info=['filename'], advanced = False):
    options = options or {}
    filename = u(filename)
    print('For:', filename)
-    guess = guess_file_info(filename, info, options, **kwargs)
+    print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string(advanced))
    if options.get('yaml'):
        try:
            import yaml
            for k, v in guess.items():
                if isinstance(v, list) and len(v) == 1:
                    guess[k] = v[0]
            ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False)
            i = 0
            for yline in ystr.splitlines():
                if i == 0:
                    print("? " + yline[:-1])
                elif i == 1:
                    print(":" + yline[1:])
                else:
                    print(yline)
                i = i + 1
            return
        except ImportError:  # pragma: no cover
            print('PyYAML not found. Using default output.')
    print('GuessIt found:', guess.nice_string(options.get('advanced')))
-def _supported_properties():
+def run_demo(episodes=True, movies=True, advanced=False):
    from guessit.plugins import transformers
    all_properties = {}
    transformers_properties = []
    for transformer in transformers.all_transformers():
        supported_properties = transformer.supported_properties()
        transformers_properties.append((transformer, supported_properties))
        if isinstance(supported_properties, dict):
            for property_name, possible_values in supported_properties.items():
                current_possible_values = all_properties.get(property_name)
                if current_possible_values is None:
                    current_possible_values = []
                    all_properties[property_name] = current_possible_values
                if possible_values:
                    current_possible_values.extend(possible_values)
        else:
            for property_name in supported_properties:
                current_possible_values = all_properties.get(property_name)
                if current_possible_values is None:
                    current_possible_values = []
                    all_properties[property_name] = current_possible_values
    return (all_properties, transformers_properties)
 def display_transformers():
    print('GuessIt transformers:')
    _, transformers_properties = _supported_properties()
    for transformer, _ in transformers_properties:
        print('[@] %s (%s)' % (transformer.name, transformer.priority))
 def display_properties(values, transformers):
    print('GuessIt properties:')
    all_properties, transformers_properties = _supported_properties()
    if transformers:
        for transformer, properties_list in transformers_properties:
            print('[@] %s (%s)' % (transformer.name, transformer.priority))
            for property_name in properties_list:
                property_values = all_properties.get(property_name)
                print('  [+] %s' % (property_name,))
                if property_values and values:
                    _display_property_values(property_name, indent=4)
    else:
        properties_list = []
        properties_list.extend(all_properties.keys())
        properties_list.sort()
        for property_name in properties_list:
            property_values = all_properties.get(property_name)
            print('  [+] %s' % (property_name,))
            if property_values and values:
                _display_property_values(property_name, indent=4)
 def _display_property_values(property_name, indent=2):
    all_properties, _ = _supported_properties()
    property_values = all_properties.get(property_name)
    for property_value in property_values:
        print(indent * ' ' + '[!] %s' % (property_value,))
 def run_demo(episodes=True, movies=True, options=None):
    # NOTE: tests should not be added here but rather in the tests/ folder
    #       this is just intended as a quick example
    if episodes:
-        testeps = ['Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
+        testeps = [ 'Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
-                   'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
+                    'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
-                   'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
+                    'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
-                   'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
+                    'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
-                   'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi',
+                    'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi',
-                   'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
+                    'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
-                   'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
+                    'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
-                   'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
+                    'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
-                   'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
+                    'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
-                   ]
+                    ]
        for f in testeps:
-            print('-' * 80)
+            print('-'*80)
-            guess_file(f, options=options, type='episode')
+            detect_filename(f, filetype='episode', advanced=advanced)
    if movies:
-        testmovies = ['Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
+        testmovies = [ 'Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
-                      'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
+                       'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
-                      'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
+                       'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
-                      'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
+                       'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
-                      'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
+                       'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
-                      'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi',  # FIXME: PROPER and R5 get overwritten
+                       'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten
-                      '[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv',  # FIXME: title gets overwritten
+                       '[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten
-                      'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
+                       'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
-                      'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
+                       'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
-                      'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
+                       'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
-                      'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv',
+                       'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv',
-                      'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi',
+                       'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi',
-                      'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi',
+                       'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi',
-                      'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi',
+                       'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi',
-                      'Movies/Juno (2007)/Juno KLAXXON.avi',
+                       'Movies/Juno (2007)/Juno KLAXXON.avi',
-                      'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv',
+                       'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv',
-                      'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt',
+                       'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt',
-                      'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi',
+                       'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi',
-                      'testsmewt_bugs/movies/Baraka_Edition_Collector.avi'
+                       'testsmewt_bugs/movies/Baraka_Edition_Collector.avi'
-                      ]
+                       ]
        for f in testmovies:
-            print('-' * 80)
+            print('-'*80)
-            guess_file(f, options=options, type='movie')
+            detect_filename(f, filetype = 'movie', advanced = advanced)
-def main(args=None, setup_logging=True):
+def main():
-    if setup_logging:
+    slogging.setupLogging()
        from guessit import slogging
        slogging.setupLogging()
-    if PY2:  # pragma: no cover
+    # see http://bugs.python.org/issue2128
-        import codecs
+    if sys.version_info.major < 3 and os.name == 'nt':        
-        import locale
+        for i, a in enumerate(sys.argv):
-        import sys
+            sys.argv[i] = a.decode(locale.getpreferredencoding())
    parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]')
    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
                      help = 'display debug output')
    parser.add_option('-i', '--info', dest = 'info', default = 'filename',
                      help = 'the desired information type: filename, hash_mpc or a hash from python\'s '
                             'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
                             'them, comma-separated')
    parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect',
                      help = 'the suggested file type: movie, episode or autodetect')
    parser.add_option('-a', '--advanced', dest = 'advanced', action='store_true', default = False,
                  help = 'display advanced information for filename guesses, as json output')
    parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
                      help = 'run a few builtin tests instead of analyzing a file')
-        # see http://bugs.python.org/issue2128
+    options, args = parser.parse_args()
        if os.name == 'nt':
            for i, a in enumerate(sys.argv):
                sys.argv[i] = a.decode(locale.getpreferredencoding())
        # see https://github.com/wackou/guessit/issues/43
        # and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
        # Wrap sys.stdout into a StreamWriter to allow writing unicode.
        sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
    if args:
        options, args = option_parser.parse_args(args)
    else:  # pragma: no cover
        options, args = option_parser.parse_args()
    if options.verbose:
-        logging.getLogger().setLevel(logging.DEBUG)
+        logging.getLogger('guessit').setLevel(logging.DEBUG)
    help_required = True
    if options.properties or options.values:
        display_properties(options.values, options.transformers)
        help_required = False
    elif options.transformers:
        display_transformers()
        help_required = False
    if options.demo:
-        run_demo(episodes=True, movies=True, options=vars(options))
+        run_demo(episodes=True, movies=True, advanced=options.advanced)
        help_required = False
    else:
        if args:
            help_required = False
            for filename in args:
-                guess_file(filename,
+                detect_filename(filename,
-                                info=options.info.split(','),
+                                filetype = options.filetype,
-                                options=vars(options)
+                                info = options.info.split(','),
-                                )
+                                advanced = options.advanced)
-    if help_required:  # pragma: no cover
+        else:
-        option_parser.print_help()
+            parser.print_help()
 if __name__ == '__main__':
    main()
--- a/lib/guessit/version.py
+++ b/lib/guessit/version.py
@ -1,20 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 __version__ = '0.7.1'
--- a/lib/guessit/containers.py
+++ b/lib/guessit/containers.py
@ -1,615 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from .patterns import compile_pattern, sep
 from . import base_text_type
 from .guess import Guess
 import types
 def _get_span(prop, match):
    """Retrieves span for a match"""
    if not prop.global_span and match.re.groups:
        start = None
        end = None
        for i in range(1, match.re.groups + 1):
            span = match.span(i)
            if start is None or span[0] < start:
                start = span[0]
            if end is None or span[1] > end:
                end = span[1]
        return (start, end)
    else:
        return match.span()
        start = span[0]
        end = span[1]
 def _get_groups(compiled_re):
    """
    Retrieves groups from re
    :return: list of group names
    """
    if compiled_re.groups:
        indexgroup = {}
        for k, i in compiled_re.groupindex.items():
            indexgroup[i] = k
        ret = []
        for i in range(1, compiled_re.groups + 1):
            ret.append(indexgroup.get(i, i))
        return ret
    else:
        return [None]
 class NoValidator(object):
    def validate(self, prop, string, node, match, entry_start, entry_end):
        return True
 class DefaultValidator(object):
    """Make sure our match is surrounded by separators, or by another entry"""
    def validate(self, prop, string, node, match, entry_start, entry_end):
        start, end = _get_span(prop, match)
        sep_start = start <= 0 or string[start - 1] in sep
        sep_end = end >= len(string) or string[end] in sep
        start_by_other = start in entry_end
        end_by_other = end in entry_start
        if (sep_start or start_by_other) and (sep_end or end_by_other):
            return True
        return False
 class WeakValidator(DefaultValidator):
    """Make sure our match is surrounded by separators and is the first or last element in the string"""
    def validate(self, prop, string, node, match, entry_start, entry_end):
        if super(WeakValidator, self).validate(prop, string, node, match, entry_start, entry_end):
            span = match.span()
            start = span[0]
            end = span[1]
            at_start = True
            at_end = True
            while start > 0:
                start = start - 1
                if string[start] not in sep:
                    at_start = False
                    break
            if at_start:
                return True
            while end < len(string) - 1:
                end = end + 1
                if string[end] not in sep:
                    at_end = False
                    break
            if at_end:
                return True
        return False
 class LeavesValidator(DefaultValidator):
    def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
        self.previous_lambdas = previous_lambdas if not previous_lambdas is None else []
        self.next_lambdas = next_lambdas if not next_lambdas is None else []
        if lambdas:
            self.previous_lambdas.extend(lambdas)
            self.next_lambdas.extend(lambdas)
        self.both_side = both_side
        self.default_ = default_
    """Make sure our match is surrounded by separators and validates defined lambdas"""
    def validate(self, prop, string, node, match, entry_start, entry_end):
        if self.default_:
            super_ret = super(LeavesValidator, self).validate(prop, string, node, match, entry_start, entry_end)
        else:
            super_ret = True
        if not super_ret:
            return False
        previous_ = self._validate_previous(prop, string, node, match, entry_start, entry_end)
        if previous_ and self.both_side:
            return previous_
        next_ = self._validate_next(prop, string, node, match, entry_start, entry_end)
        if previous_ is None and next_ is None:
            return super_ret
        if self.both_side:
            return previous_ and next_
        else:
            return previous_ or next_
    def _validate_previous(self, prop, string, node, match, entry_start, entry_end):
        if self.previous_lambdas:
            for leaf in node.root.previous_leaves(node):
                for lambda_ in self.previous_lambdas:
                    ret = self._check_rule(lambda_, leaf)
                    if not ret is None:
                        return ret
            return False
    def _validate_next(self, prop, string, node, match, entry_start, entry_end):
        if self.next_lambdas:
            for leaf in node.root.next_leaves(node):
                for lambda_ in self.next_lambdas:
                    ret = self._check_rule(lambda_, leaf)
                    if not ret is None:
                        return ret
            return False
    def _check_rule(self, lambda_, previous_leaf):
        return lambda_(previous_leaf)
 class _Property:
    """Represents a property configuration."""
    def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None):
        """
        :param keys: Keys of the property (format, screenSize, ...)
        :type keys: string
        :param canonical_form: Unique value of the property (DVD, 720p, ...)
        :type canonical_form: string
        :param pattern: Regexp pattern
        :type pattern: string
        :param confidence: confidence
        :type confidence: float
        :param enhance: enhance the pattern
        :type enhance: boolean
        :param global_span: if True, the whole match span will used to create the Guess.
                            Else, the span from the capturing groups will be used.
        :type global_span: boolean
        :param validator: Validator to use
        :type validator: :class:`DefaultValidator`
        :param formatter: Formater to use
        :type formatter: function
        """
        if isinstance(keys, list):
            self.keys = keys
        elif isinstance(keys, base_text_type):
            self.keys = [keys]
        else:
            self.keys = []
        self.canonical_form = canonical_form
        if not pattern is None:
            self.pattern = pattern
        else:
            self.pattern = canonical_form
        if self.canonical_form is None and canonical_from_pattern:
            self.canonical_form = self.pattern
        self.compiled = compile_pattern(self.pattern, enhance=enhance)
        for group_name in _get_groups(self.compiled):
            if isinstance(group_name, base_text_type) and not group_name in self.keys:
                self.keys.append(group_name)
        if not self.keys:
            raise ValueError("No property key is defined")
        self.confidence = confidence
        self.global_span = global_span
        self.validator = validator
        self.formatter = formatter
    def format(self, value, group_name=None):
        """Retrieves the final value from re group match value"""
        formatter = None
        if isinstance(self.formatter, dict):
            formatter = self.formatter.get(group_name)
            if formatter is None and not group_name is None:
                formatter = self.formatter.get(None)
        else:
            formatter = self.formatter
        if isinstance(formatter, types.FunctionType):
            return formatter(value)
        elif not formatter is None:
            return formatter.format(value)
        return value
    def __repr__(self):
        return "%s: %s" % (self.keys, self.canonical_form if self.canonical_form else self.pattern)
 class PropertiesContainer(object):
    def __init__(self, **kwargs):
        self._properties = []
        self.default_property_kwargs = kwargs
    def unregister_property(self, name, *canonical_forms):
        """Unregister a property canonical forms
        If canonical_forms are specified, only those values will be unregistered
        :param name: Property name to unregister
        :type name: string
        :param canonical_forms: Values to unregister
        :type canonical_forms: varargs of string
        """
        _properties = [prop for prop in self._properties if prop.name == name and (not canonical_forms or prop.canonical_form in canonical_forms)]
    def register_property(self, name, *patterns, **property_params):
        """Register property with defined canonical form and patterns.
        :param name: name of the property (format, screenSize, ...)
        :type name: string
        :param patterns: regular expression patterns to register for the property canonical_form
        :type patterns: varargs of string
        """
        properties = []
        for pattern in patterns:
            params = dict(self.default_property_kwargs)
            params.update(property_params)
            if isinstance(pattern, dict):
                params.update(pattern)
                prop = _Property(name, **params)
            else:
                prop = _Property(name, pattern, **params)
            self._properties.append(prop)
            properties.append(prop)
        return properties
    def register_canonical_properties(self, name, *canonical_forms, **property_params):
        """Register properties from their canonical forms.
        :param name: name of the property (releaseGroup, ...)
        :type name: string
        :param canonical_forms: values of the property ('ESiR', 'WAF', 'SEPTiC', ...)
        :type canonical_forms: varargs of strings
        """
        properties = []
        for canonical_form in canonical_forms:
            params = dict(property_params)
            params['canonical_form'] = canonical_form
            properties.extend(self.register_property(name, canonical_form, **property_params))
        return properties
    def unregister_all_properties(self):
        """Unregister all defined properties"""
        self._properties.clear()
    def find_properties(self, string, node, name=None, validate=True, re_match=False, sort=True, multiple=False):
        """Find all distinct properties for given string
        If no capturing group is defined in the property, value will be grabbed from the entire match.
        If one ore more unnamed capturing group is defined in the property, first capturing group will be used.
        If named capturing group are defined in the property, they will be returned as property key.
        If validate, found properties will be validated by their defined validator
        If re_match, re.match will be used instead of re.search.
        if sort, found properties will be sorted from longer match to shorter match.
        If multiple is False and multiple values are found for the same property, the more confident one will be returned.
        If multiple is False and multiple values are found for the same property and the same confidence, the longer will be returned.
        :param string: input string
        :type string: string
        :param node: current node of the matching tree
        :type node: :class:`guessit.matchtree.MatchTree`
        :param name: name of property to find
        :type name: string
        :param re_match: use re.match instead of re.search
        :type re_match: bool
        :param multiple: Allows multiple property values to be returned
        :type multiple: bool
        :return: found properties
        :rtype: list of tuples (:class:`_Property`, match, list of tuples (property_name, tuple(value_start, value_end)))
        :see: `_Property`
        :see: `register_property`
        :see: `register_canonical_properties`
        """
        entry_start = {}
        entry_end = {}
        entries = []
        ret = []
        if not string.strip():
            return ret
        # search all properties
        for prop in self.get_properties(name):
            match = prop.compiled.match(string) if re_match else prop.compiled.search(string)
            if match:
                entry = prop, match
                entries.append(entry)
        if validate:
            # compute entries start and ends
            for prop, match in entries:
                start, end = _get_span(prop, match)
                if start not in entry_start:
                    entry_start[start] = [prop]
                else:
                    entry_start[start].append(prop)
                if end not in entry_end:
                    entry_end[end] = [prop]
                else:
                    entry_end[end].append(prop)
            # remove invalid values
            while True:
                invalid_entries = []
                for entry in entries:
                    prop, match = entry
                    if not prop.validator.validate(prop, string, node, match, entry_start, entry_end):
                        invalid_entries.append(entry)
                if not invalid_entries:
                    break
                for entry in invalid_entries:
                    prop, match = entry
                    entries.remove(entry)
                    invalid_span = _get_span(prop, match)
                    start = invalid_span[0]
                    end = invalid_span[1]
                    entry_start[start].remove(prop)
                    if not entry_start.get(start):
                        del entry_start[start]
                    entry_end[end].remove(prop)
                    if not entry_end.get(end):
                        del entry_end[end]
        if multiple:
            ret = entries
        else:
            # keep only best match if multiple values where found
            entries_dict = {}
            for entry in entries:
                for key in prop.keys:
                    if not key in entries_dict:
                        entries_dict[key] = []
                    entries_dict[key].append(entry)
            for entries in entries_dict.values():
                if multiple:
                    for entry in entries:
                        ret.append(entry)
                else:
                    best_ret = {}
                    best_prop, best_match = None, None
                    if len(entries) == 1:
                        best_prop, best_match = entries[0]
                    else:
                        for prop, match in entries:
                            start, end = _get_span(prop, match)
                            if not best_prop or \
                            best_prop.confidence < best_prop.confidence or \
                            best_prop.confidence == best_prop.confidence and \
                            best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
                                best_prop, best_match = prop, match
                    best_ret[best_prop] = best_match
                    for prop, match in best_ret.items():
                        ret.append((prop, match))
        if sort:
            def _sorting(x):
                _, x_match = x
                x_start, x_end = x_match.span()
                return (x_start - x_end)
            ret.sort(key=_sorting)
        return ret
    def as_guess(self, found_properties, input=None, filter=None, sep_replacement=None, multiple=False, *args, **kwargs):
        if filter is None:
            filter = lambda property, *args, **kwargs: True
        guesses = [] if multiple else None
        for property in found_properties:
            prop, match = property
            first_key = None
            for key in prop.keys:
                # First property key will be used as base for effective name
                if isinstance(key, base_text_type):
                    if first_key is None:
                        first_key = key
                        break
            property_name = first_key if first_key else None
            span = _get_span(prop, match)
            guess = Guess(confidence=prop.confidence, input=input, span=span, prop=property_name)
            groups = _get_groups(match.re)
            for group_name in groups:
                name = group_name if isinstance(group_name, base_text_type) else property_name if property_name not in groups else None
                if name:
                    value = self._effective_prop_value(prop, group_name, input, match.span(group_name) if group_name else match.span(), sep_replacement)
                    if not value is None:
                        is_string = isinstance(value, base_text_type)
                        if not is_string or is_string and value:  # Keep non empty strings and other defined objects
                            if isinstance(value, dict):
                                for k, v in value.items():
                                    if k is None:
                                        k = name
                                    guess[k] = v
                            else:
                                guess[name] = value
                            if group_name:
                                guess.metadata(prop).span = match.span(group_name)
            if filter(guess):
                if multiple:
                    guesses.append(guess)
                else:
                    return guess
        return guesses
    def _effective_prop_value(self, prop, group_name, input=None, span=None, sep_replacement=None):
        if prop.canonical_form:
            return prop.canonical_form
        if input is None:
            return None
        value = input
        if not span is None:
            value = value[span[0]:span[1]]
        value = input[span[0]:span[1]] if input else None
        if sep_replacement:
            for sep_char in sep:
                value = value.replace(sep_char, sep_replacement)
        if value:
            value = prop.format(value, group_name)
        return value
    def get_properties(self, name=None, canonical_form=None):
        """Retrieve properties
        :return: Properties
        :rtype: generator
        """
        for prop in self._properties:
            if (name is None or name in prop.keys) and (canonical_form is None or prop.canonical_form == canonical_form):
                yield prop
    def get_supported_properties(self):
        supported_properties = {}
        for prop in self.get_properties():
            for k in prop.keys:
                values = supported_properties.get(k)
                if not values:
                    values = set()
                    supported_properties[k] = values
                if prop.canonical_form:
                    values.add(prop.canonical_form)
        return supported_properties
 class QualitiesContainer():
    def __init__(self):
        self._qualities = {}
    def register_quality(self, name, canonical_form, rating):
        """Register a quality rating.
        :param name: Name of the property
        :type name: string
        :param canonical_form: Value of the property
        :type canonical_form: string
        :param rating: Estimated quality rating for the property
        :type rating: int
        """
        property_qualities = self._qualities.get(name)
        if property_qualities is None:
            property_qualities = {}
            self._qualities[name] = property_qualities
        property_qualities[canonical_form] = rating
    def unregister_quality(self, name, *canonical_forms):
        """Unregister quality ratings for given property name.
        If canonical_forms are specified, only those values will be unregistered
        :param name: Name of the property
        :type name: string
        :param canonical_forms: Value of the property
        :type canonical_forms: string
        """
        if not canonical_forms:
            if name in self._qualities:
                del self._qualities[name]
        else:
            property_qualities = self._qualities.get(name)
            if not property_qualities is None:
                for property_canonical_form in canonical_forms:
                    if property_canonical_form in property_qualities:
                        del property_qualities[property_canonical_form]
            if not property_qualities:
                del self._qualities[name]
    def clear_qualities(self,):
        """Unregister all defined quality ratings.
        """
        self._qualities.clear()
    def rate_quality(self, guess, *props):
        """Rate the quality of guess.
        :param guess: Guess to rate
        :type guess: :class:`guessit.guess.Guess`
        :param props: Properties to include in the rating. if empty, rating will be performed for all guess properties.
        :type props: varargs of string
        :return: Quality of the guess. The higher, the better.
        :rtype: int
        """
        rate = 0
        if not props:
            props = guess.keys()
        for prop in props:
            prop_value = guess.get(prop)
            prop_qualities = self._qualities.get(prop)
            if not prop_value is None and not prop_qualities is None:
                rate += prop_qualities.get(prop_value, 0)
        return rate
    def best_quality_properties(self, props, *guesses):
        """Retrieve the best quality guess, based on given properties
        :param props: Properties to include in the rating
        :type props: list of strings
        :param guesses: Guesses to rate
        :type guesses: :class:`guessit.guess.Guess`
        :return: Best quality guess from all passed guesses
        :rtype: :class:`guessit.guess.Guess`
        """
        best_guess = None
        best_rate = None
        for guess in guesses:
            rate = self.rate_quality(guess, *props)
            if best_rate is None or best_rate < rate:
                best_rate = rate
                best_guess = guess
        return best_guess
    def best_quality(self, *guesses):
        """Retrieve the best quality guess.
        :param guesses: Guesses to rate
        :type guesses: :class:`guessit.guess.Guess`
        :return: Best quality guess from all passed guesses
        :rtype: :class:`guessit.guess.Guess`
        """
        best_guess = None
        best_rate = None
        for guess in guesses:
            rate = self.rate_quality(guess)
            if best_rate is None or best_rate < rate:
                best_rate = rate
                best_guess = guess
        return best_guess
--- a/lib/guessit/country.py
+++ b/lib/guessit/country.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,13 +18,12 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import UnicodeMixin, base_text_type, u
 from guessit.fileutils import load_file_in_same_dir
 import logging
-__all__ = ['Country']
+__all__ = [ 'Country' ]
 log = logging.getLogger(__name__)
@ -37,12 +36,12 @@ log = logging.getLogger(__name__)
 # are all separated by pipe (|) characters."
 _iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt')
-country_matrix = [l.strip().split('|')
+country_matrix = [ l.strip().split('|')
-                   for l in _iso3166_contents.strip().split('\n')]
+                   for l in _iso3166_contents.strip().split('\n') ]
-country_matrix += [['Unknown', 'un', 'unk', '', ''],
+country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ],
-                   ['Latin America', '', 'lat', '', '']
+                    [ 'Latin America', '', 'lat', '', '' ]
-                   ]
+                    ]
 country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
 country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
@ -50,16 +49,17 @@ country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matr
 # add here exceptions / non ISO representations
 # Note: remember to put those exceptions in lower-case, they won't work otherwise
-country_to_alpha3.update({'latinoamérica': 'lat',
+country_to_alpha3.update({ 'latinoamérica': 'lat',
-                          'brazilian': 'bra',
+                           'brazilian': 'bra',
-                          'españa': 'esp',
+                           'españa': 'esp',
-                          'uk': 'gbr'
+                           'uk': 'gbr'
-                          })
+                           })
 country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
 country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
 class Country(UnicodeMixin):
    """This class represents a country.
@ -78,6 +78,7 @@ class Country(UnicodeMixin):
        if self.alpha3 is None:
            self.alpha3 = 'unk'
    @property
    def alpha2(self):
        return country_alpha3_to_alpha2[self.alpha3]
--- a/lib/guessit/date.py
+++ b/lib/guessit/date.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,55 +18,15 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 import datetime
 import re
 import math
 _dsep = r'[-/ \.]'
 _date_rexps = [re.compile(
        # 20010823
        r'[^0-9]' +
        r'(?P<year>[0-9]{4})' +
        r'(?P<month>[0-9]{2})' +
        r'(?P<day>[0-9]{2})' +
        r'[^0-9]'),
        # 2001-08-23
        re.compile(r'[^0-9]' +
        r'(?P<year>[0-9]{4})' + _dsep +
        r'(?P<month>[0-9]{2})' + _dsep +
        r'(?P<day>[0-9]{2})' +
        r'[^0-9]'),
        # 23-08-2001
        re.compile(r'[^0-9]' +
        r'(?P<day>[0-9]{2})' + _dsep +
        r'(?P<month>[0-9]{2})' + _dsep +
        r'(?P<year>[0-9]{4})' +
        r'[^0-9]'),
        # 23-08-01
        re.compile(r'[^0-9]' +
        r'(?P<day>[0-9]{2})' + _dsep +
        r'(?P<month>[0-9]{2})' + _dsep +
        r'(?P<year>[0-9]{2})' +
        r'[^0-9]'),
        ]
 def valid_year(year, today=None):
    """Check if number is a valid year"""
    if not today:
        today = datetime.date.today()
    return 1920 < year < today.year + 5
 def valid_year(year):
    return 1920 < year < datetime.date.today().year + 5
 def search_year(string):
    """Looks for year patterns, and if found return the year and group span.
    Assumes there are sentinels at the beginning and end of the string that
    always allow matching a non-digit delimiting the date.
@ -74,10 +34,10 @@ def search_year(string):
    and now + 5 years, so for instance 2000 would be returned as a valid
    year but 1492 would not.
-    >>> search_year(' in the year 2000... ')
+    >>> search_year('in the year 2000...')
-    (2000, (13, 17))
+    (2000, (12, 16))
-    >>> search_year(' they arrived in 1492. ')
+    >>> search_year('they arrived in 1492.')
    (None, None)
    """
    match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
@ -91,32 +51,59 @@ def search_year(string):
 def search_date(string):
    """Looks for date patterns, and if found return the date and group span.
    Assumes there are sentinels at the beginning and end of the string that
    always allow matching a non-digit delimiting the date.
-    Year can be defined on two digit only. It will return the nearest possible
+    >>> search_date('This happened on 2002-04-22.')
-    date from today.
+    (datetime.date(2002, 4, 22), (17, 27))
-    >>> search_date(' This happened on 2002-04-22. ')
+    >>> search_date('And this on 17-06-1998.')
-    (datetime.date(2002, 4, 22), (18, 28))
+    (datetime.date(1998, 6, 17), (12, 22))
-    >>> search_date(' And this on 17-06-1998. ')
+    >>> search_date('no date in here')
    (datetime.date(1998, 6, 17), (13, 23))
    >>> search_date(' no date in here ')
    (None, None)
    """
-    today = datetime.date.today()
+    dsep = r'[-/ \.]'
-    for drexp in _date_rexps:
+
    date_rexps = [
        # 20010823
        r'[^0-9]' +
        r'(?P<year>[0-9]{4})' +
        r'(?P<month>[0-9]{2})' +
        r'(?P<day>[0-9]{2})' +
        r'[^0-9]',
        # 2001-08-23
        r'[^0-9]' +
        r'(?P<year>[0-9]{4})' + dsep +
        r'(?P<month>[0-9]{2})' + dsep +
        r'(?P<day>[0-9]{2})' +
        r'[^0-9]',
        # 23-08-2001
        r'[^0-9]' +
        r'(?P<day>[0-9]{2})' + dsep +
        r'(?P<month>[0-9]{2})' + dsep +
        r'(?P<year>[0-9]{4})' +
        r'[^0-9]',
        # 23-08-01
        r'[^0-9]' +
        r'(?P<day>[0-9]{2})' + dsep +
        r'(?P<month>[0-9]{2})' + dsep +
        r'(?P<year>[0-9]{2})' +
        r'[^0-9]',
        ]
    for drexp in date_rexps:
        match = re.search(drexp, string)
        if match:
            d = match.groupdict()
            year, month, day = int(d['year']), int(d['month']), int(d['day'])
            # years specified as 2 digits should be adjusted here
            if year < 100:
-                if year > (today.year % 100) + 5:
+                if year > (datetime.date.today().year % 100) + 5:
                    year = 1900 + year
                else:
                    year = 2000 + year
@ -134,7 +121,7 @@ def search_date(string):
                continue
            # check date plausibility
-            if not valid_year(date.year, today=today):
+            if not 1900 < date.year < datetime.date.today().year + 5:
                continue
            # looks like we have a valid date
--- a/lib/guessit/fileutils.py
+++ b/lib/guessit/fileutils.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,8 +18,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import s, u
 import os.path
 import zipfile
@ -45,13 +44,17 @@ def split_path(path):
    result = []
    while True:
        head, tail = os.path.split(path)
        headlen = len(head)
-        if not head and not tail:
+        # on Unix systems, the root folder is '/'
-            return result
+        if head and head == '/'*headlen and tail == '':
            return ['/'] + result
-        if not tail and head == path:
+        # on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\
-            # Make sure we won't have an infinite loop.
+        if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '':
-            result = [head] + result
+            return [head] + result
        if head == '' and tail == '':
            return result
        # we just split a directory ending with '/', so tail is empty
@ -67,8 +70,8 @@ def split_path(path):
 def file_in_same_dir(ref_file, desired_file):
    """Return the path for a file in the same dir as a given reference file.
-    >>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings')) == os.path.normpath('~/smewt/smewt.settings')
+    >>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings'))
-    True
+    '~/smewt/smewt.settings'
    """
    return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))
--- a/lib/guessit/guess.py
+++ b/lib/guessit/guess.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,9 +18,10 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import UnicodeMixin, s, u, base_text_type
 from guessit.language import Language
 from guessit.country import Country
 import json
 import datetime
 import logging
@ -28,103 +29,6 @@ import logging
 log = logging.getLogger(__name__)
 class GuessMetadata(object):
    """GuessMetadata contains confidence, an input string, span and related property.
    If defined on a property of Guess object, it overrides the object defined as global.
    :param parent: The parent metadata, used for undefined properties in self object
    :type parent: :class: `GuessMedata`
    :param confidence: The confidence (from 0.0 to 1.0)
    :type confidence: number
    :param input: The input string
    :type input: string
    :param span: The input string
    :type span: tuple (int, int)
    :param prop: The found property definition
    :type prop: :class `guessit.containers._Property`
    """
    def __init__(self, parent=None, confidence=None, input=None, span=None, prop=None, *args, **kwargs):
        self.parent = parent
        if confidence is None and self.parent is None:
            self._confidence = 1.0
        else:
            self._confidence = confidence
        self._input = input
        self._span = span
        self._prop = prop
    @property
    def confidence(self):
        """The confidence
        :rtype: int
        :return: confidence value
        """
        return self._confidence if not self._confidence is None else self.parent.confidence if self.parent else None
    @confidence.setter
    def confidence(self, confidence):
        self._confidence = confidence
    @property
    def input(self):
        """The input
        :rtype: string
        :return: String used to find this guess value
        """
        return self._input if not self._input is None else self.parent.input if self.parent else None
    @property
    def span(self):
        """The span
        :rtype: tuple (int, int)
        :return: span of input string used to find this guess value
        """
        return self._span if not self._span is None else self.parent.span if self.parent else None
    @span.setter
    def span(self, span):
        """The span
        :rtype: tuple (int, int)
        :return: span of input string used to find this guess value
        """
        self._span = span
    @property
    def prop(self):
        """The property
        :rtype: :class:`_Property`
        :return: The property
        """
        return self._prop if not self._prop is None else self.parent.prop if self.parent else None
    @property
    def raw(self):
        """Return the raw information (original match from the string,
        not the cleaned version) associated with the given property name."""
        if self.input and self.span:
            return self.input[self.span[0]:self.span[1]]
        return None
    def __repr__(self, *args, **kwargs):
        return object.__repr__(self, *args, **kwargs)
 def _split_kwargs(**kwargs):
    metadata_args = {}
    for prop in dir(GuessMetadata):
        try:
            metadata_args[prop] = kwargs.pop(prop)
        except KeyError:
            pass
    return metadata_args, kwargs
 class Guess(UnicodeMixin, dict):
    """A Guess is a dictionary which has an associated confidence for each of
    its values.
@ -133,98 +37,91 @@ class Guess(UnicodeMixin, dict):
    simple dict."""
    def __init__(self, *args, **kwargs):
-        metadata_kwargs, kwargs = _split_kwargs(**kwargs)
+        try:
-        self._global_metadata = GuessMetadata(**metadata_kwargs)
+            confidence = kwargs.pop('confidence')
        except KeyError:
            confidence = 0
        try:
            raw = kwargs.pop('raw')
        except KeyError:
            raw = None
        dict.__init__(self, *args, **kwargs)
-        self._metadata = {}
+        self._confidence = {}
        self._raw = {}
        for prop in self:
-            self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
+            self._confidence[prop] = confidence
-
+            self._raw[prop] = raw
    def to_dict(self, advanced=False):
        """Return the guess as a dict containing only base types, ie:
        where dates, languages, countries, etc. are converted to strings.
        if advanced is True, return the data as a json string containing
        also the raw information of the properties."""
        data = dict(self)
        for prop, value in data.items():
            if isinstance(value, datetime.date):
                data[prop] = value.isoformat()
-            elif isinstance(value, (UnicodeMixin, base_text_type)):
+            elif isinstance(value, (Language, Country, base_text_type)):
                data[prop] = u(value)
            elif isinstance(value, list):
                data[prop] = [u(x) for x in value]
            if advanced:
-                metadata = self.metadata(prop)
+                data[prop] = {"value": data[prop], "raw": self.raw(prop), "confidence": self.confidence(prop)}
                prop_data = {'value': data[prop]}
                if metadata.raw:
                    prop_data['raw'] = metadata.raw
                if metadata.confidence:
                    prop_data['confidence'] = metadata.confidence
                data[prop] = prop_data
        return data
    def nice_string(self, advanced=False):
        """Return a string with the property names and their values,
        that also displays the associated confidence to each property.
        FIXME: doc with param"""
        if advanced:
            data = self.to_dict(advanced)
            return json.dumps(data, indent=4)
-        else:
+        else:            
            data = self.to_dict()
-
+    
            parts = json.dumps(data, indent=4).split('\n')
            for i, p in enumerate(parts):
                if p[:5] != '    "':
                    continue
-
+    
                prop = p.split('"')[1]
                parts[i] = ('    [%.2f] "' % self.confidence(prop)) + p[5:]
-
+    
            return '\n'.join(parts)
    def __unicode__(self):
        return u(self.to_dict())
-    def metadata(self, prop=None):
+    def confidence(self, prop):
-        """Return the metadata associated with the given property name
+        return self._confidence.get(prop, -1)
-
+    
        If no property name is given, get the global_metadata
        """
        if prop is None:
            return self._global_metadata
        if not prop in self._metadata:
            self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
        return self._metadata[prop]
    def confidence(self, prop=None):
        return self.metadata(prop).confidence
    def set_confidence(self, prop, confidence):
        self.metadata(prop).confidence = confidence
    def raw(self, prop):
-        return self.metadata(prop).raw
+        return self._raw.get(prop, None)
-    def set(self, prop_name, value, *args, **kwargs):
+    def set(self, prop, value, confidence=None, raw=None):
-        self[prop_name] = value
+        self[prop] = value
-        self._metadata[prop_name] = GuessMetadata(parent=self._global_metadata, *args, **kwargs)
+        if confidence is not None:
            self._confidence[prop] = confidence
        if raw is not None:
            self._raw[prop] = raw
-    def update(self, other, confidence=None):
+    def set_confidence(self, prop, value):
        self._confidence[prop] = value
    def set_raw(self, prop, value):
        self._raw[prop] = value
    def update(self, other, confidence=None, raw=None):
        dict.update(self, other)
        if isinstance(other, Guess):
            for prop in other:
-                try:
+                self._confidence[prop] = other.confidence(prop)
-                    self._metadata[prop] = other._metadata[prop]
+                self._raw[prop] = other.raw(prop)
-                except KeyError:
+
-                    pass
+        if confidence is not None:
        if not confidence is None:
            for prop in other:
-                self.set_confidence(prop, confidence)
+                self._confidence[prop] = confidence
        if raw is not None:
            for prop in other:
                self._raw[prop] = raw
    def update_highest_confidence(self, other):
        """Update this guess with the values from the given one. In case
@ -234,16 +131,17 @@ class Guess(UnicodeMixin, dict):
            raise ValueError('Can only call this function on Guess instances')
        for prop in other:
-            if prop in self and self.metadata(prop).confidence >= other.metadata(prop).confidence:
+            if prop in self and self.confidence(prop) >= other.confidence(prop):
                continue
            self[prop] = other[prop]
-            self._metadata[prop] = other.metadata(prop)
+            self._confidence[prop] = other.confidence(prop)
            self._raw[prop] = other.raw(prop)
 def choose_int(g1, g2):
    """Function used by merge_similar_guesses to choose between 2 possible
    properties when they are integers."""
-    v1, c1 = g1  # value, confidence
+    v1, c1 = g1 # value, confidence
    v2, c2 = g2
    if (v1 == v2):
        return (v1, 1 - (1 - c1) * (1 - c2))
@ -281,7 +179,7 @@ def choose_string(g1, g2):
    ('The Simpsons', 0.75)
    """
-    v1, c1 = g1  # value, confidence
+    v1, c1 = g1 # value, confidence
    v2, c2 = g2
    if not v1:
@ -388,48 +286,43 @@ def merge_all(guesses, append=None):
    instead of being merged.
    >>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
-    ...               Guess({'episodeNumber': 13}, confidence=0.8) ])
+    ...               Guess({'episodeNumber': 13}, confidence=0.8) ]))
-    ... ) == {'season': 2, 'episodeNumber': 13}
+    {'season': 2, 'episodeNumber': 13}
    True
    >>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
-    ...               Guess({'season': 1}, confidence=0.2) ])
+    ...               Guess({'season': 1}, confidence=0.2) ]))
-    ... ) == {'season': 1}
+    {'season': 1}
    True
    >>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
    ...               Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
-    ...             append=['other'])
+    ...             append=['other']))
-    ... ) == {'releaseGroup': '2HD', 'other': ['PROPER']}
+    {'releaseGroup': '2HD', 'other': ['PROPER']}
-    True
+
    """
    result = Guess()
    if not guesses:
-        return result
+        return Guess()
    result = guesses[0]
    if append is None:
        append = []
-    for g in guesses:
+    for g in guesses[1:]:
        # first append our appendable properties
        for prop in append:
            if prop in g:
                result.set(prop, result.get(prop, []) + [g[prop]],
                           # TODO: what to do with confidence here? maybe an
                           # arithmetic mean...
-                           confidence=g.metadata(prop).confidence,
+                           confidence=g.confidence(prop),
-                           input=g.metadata(prop).input,
+                           raw=g.raw(prop))
                           span=g.metadata(prop).span,
                           prop=g.metadata(prop).prop)
                del g[prop]
        # then merge the remaining ones
        dups = set(result) & set(g)
        if dups:
-            log.warning('duplicate properties %s in merged result...' % [(result[p], g[p]) for p in dups])
+            log.warning('duplicate properties %s in merged result...' % [ (result[p], g[p]) for p in dups] )
        result.update_highest_confidence(g)
@ -445,7 +338,7 @@ def merge_all(guesses, append=None):
            if isinstance(value, list):
                result[prop] = list(set(value))
            else:
-                result[prop] = [value]
+                result[prop] = [ value ]
        except KeyError:
            pass
--- a/lib/guessit/hash_ed2k.py
+++ b/lib/guessit/hash_ed2k.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,8 +18,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import s, to_hex
 import hashlib
 import os.path
@ -28,9 +27,8 @@ import os.path
 def hash_file(filename):
    """Returns the ed2k hash of a given file.
-    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
+    >>> s(hash_file('tests/dummy.srt'))
-    >>> s(hash_file(testfile))
+    'ed2k://|file|dummy.srt|44|1CA0B9DED3473B926AA93A0A546138BB|/'
    'ed2k://|file|dummy.srt|59|41F58B913AB3973F593BEBA8B8DF6510|/'
    """
    return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
                                        os.path.getsize(filename),
--- a/lib/guessit/hash_mpc.py
+++ b/lib/guessit/hash_mpc.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,8 +18,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 import struct
 import os
@ -29,7 +28,7 @@ def hash_file(filename):
    http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
    and is licensed under the GPL."""
-    longlongformat = b'q'  # long long
+    longlongformat = 'q'  # long long
    bytesize = struct.calcsize(longlongformat)
    f = open(filename, "rb")
@ -40,14 +39,14 @@ def hash_file(filename):
    if filesize < 65536 * 2:
        raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
-    for x in range(int(65536 / bytesize)):
+    for x in range(65536 / bytesize):
        buf = f.read(bytesize)
        (l_value,) = struct.unpack(longlongformat, buf)
        hash_value += l_value
-        hash_value = hash_value & 0xFFFFFFFFFFFFFFFF  # to remain as 64bit number
+        hash_value = hash_value & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
    f.seek(max(0, filesize - 65536), 0)
-    for x in range(int(65536 / bytesize)):
+    for x in range(65536 / bytesize):
        buf = f.read(bytesize)
        (l_value,) = struct.unpack(longlongformat, buf)
        hash_value += l_value
--- a/lib/guessit/language.py
+++ b/lib/guessit/language.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,143 +18,122 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
-
+from guessit import UnicodeMixin, base_text_type, u, s
-from guessit import UnicodeMixin, base_text_type, u
+from guessit.fileutils import load_file_in_same_dir
 from guessit.textutils import find_words
-from babelfish import Language
+from guessit.country import Country
 import babelfish
 import re
 import logging
 from guessit.guess import Guess
-__all__ = ['Language', 'UNDETERMINED',
+__all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language',
-           'search_language', 'guess_language']
+            'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'UNDETERMINED',
            'search_language', 'guess_language' ]
 log = logging.getLogger(__name__)
 UNDETERMINED = babelfish.Language('und')
-SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'],
+# downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
-       ('ell', None): ['gr', 'greek'],
+#
-       ('spa', None): ['esp', 'español'],
+# Description of the fields:
-       ('fra', None): ['français', 'vf', 'vff', 'vfi'],
+# "An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given),
-       ('swe', None): ['se'],
+# an alpha-2 code (when given), an English name, and a French name of a language
-       ('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'],
+# are all separated by pipe (|) characters."
-       ('cat', None): ['català'],
+_iso639_contents = load_file_in_same_dir(__file__, 'ISO-639-2_utf-8.txt')
-       ('ces', None): ['cz'],
+
-       ('ukr', None): ['ua'],
+# drop the BOM from the beginning of the file
-       ('zho', None): ['cn'],
+_iso639_contents = _iso639_contents[1:]
-       ('jpn', None): ['jp'],
+
-       ('hrv', None): ['scr'],
+language_matrix = [ l.strip().split('|')
-       ('mul', None): ['multi', 'dl'],  # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
+                    for l in _iso639_contents.strip().split('\n') ]
       }
-class GuessitConverter(babelfish.LanguageReverseConverter):
+# update information in the language matrix
 language_matrix += [['mol', '', 'mo', 'Moldavian', 'moldave'],
                    ['ass', '', '', 'Assyrian', 'assyrien']]
-    _with_country_regexp = re.compile('(.*)\((.*)\)')
+for lang in language_matrix:
-    _with_country_regexp2 = re.compile('(.*)-(.*)')
+    # remove unused languages that shadow other common ones with a non-official form
-
+    if (lang[2] == 'se' or # Northern Sami shadows Swedish
-    def __init__(self):
+        lang[2] == 'br'):  # Breton shadows Brazilian
-        self.guessit_exceptions = {}
+        lang[2] = ''
-        for (alpha3, country), synlist in SYN.items():
+    # add missing information
-            for syn in synlist:
+    if lang[0] == 'und':
-                self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
+        lang[2] = 'un'
-
+    if lang[0] == 'srp':
-    @property
+        lang[1] = 'scc' # from OpenSubtitles
    def codes(self):
        return (babelfish.language_converters['alpha3b'].codes |
                babelfish.language_converters['alpha2'].codes |
                babelfish.language_converters['name'].codes |
                babelfish.language_converters['opensubtitles'].codes |
                babelfish.country_converters['name'].codes |
                frozenset(self.guessit_exceptions.keys()))
    def convert(self, alpha3, country=None, script=None):
        return str(babelfish.Language(alpha3, country, script))
    def reverse(self, name):
        with_country = (GuessitConverter._with_country_regexp.match(name) or
                        GuessitConverter._with_country_regexp2.match(name))
        if with_country:
            lang = babelfish.Language.fromguessit(with_country.group(1).strip())
            lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
            return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None)
        # exceptions come first, as they need to override a potential match
        # with any of the other guessers
        try:
            return self.guessit_exceptions[name.lower()]
        except KeyError:
            pass
        for conv in [babelfish.Language,
                     babelfish.Language.fromalpha3b,
                     babelfish.Language.fromalpha2,
                     babelfish.Language.fromname,
                     babelfish.Language.fromopensubtitles]:
            try:
                c = conv(name)
                return c.alpha3, c.country, c.script
            except (ValueError, babelfish.LanguageReverseError):
                pass
        raise babelfish.LanguageReverseError(name)
-babelfish.language_converters['guessit'] = GuessitConverter()
+lng3        = frozenset(l[0] for l in language_matrix if l[0])
 lng3term    = frozenset(l[1] for l in language_matrix if l[1])
 lng2        = frozenset(l[2] for l in language_matrix if l[2])
 lng_en_name = frozenset(lng for l in language_matrix
                        for lng in l[3].lower().split('; ') if lng)
 lng_fr_name = frozenset(lng for l in language_matrix
                        for lng in l[4].lower().split('; ') if lng)
 lng_all_names = lng3 | lng3term | lng2 | lng_en_name | lng_fr_name
-COUNTRIES_SYN = {'ES': ['españa'],
+lng3_to_lng3term = dict((l[0], l[1]) for l in language_matrix if l[1])
-                 'GB': ['UK'],
+lng3term_to_lng3 = dict((l[1], l[0]) for l in language_matrix if l[1])
-                 'BR': ['brazilian', 'bra'],
+
-                 # FIXME: this one is a bit of a stretch, not sure how to do
+lng3_to_lng2 = dict((l[0], l[2]) for l in language_matrix if l[2])
-                 #        it properly, though...
+lng2_to_lng3 = dict((l[2], l[0]) for l in language_matrix if l[2])
-                 'MX': ['Latinoamérica', 'latin america']
+
-                 }
+# we only return the first given english name, hoping it is the most used one
 lng3_to_lng_en_name = dict((l[0], l[3].split('; ')[0])
                           for l in language_matrix if l[3])
 lng_en_name_to_lng3 = dict((en_name.lower(), l[0])
                           for l in language_matrix if l[3]
                           for en_name in l[3].split('; '))
 # we only return the first given french name, hoping it is the most used one
 lng3_to_lng_fr_name = dict((l[0], l[4].split('; ')[0])
                           for l in language_matrix if l[4])
 lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0])
                           for l in language_matrix if l[4]
                           for fr_name in l[4].split('; '))
 # contains a list of exceptions: strings that should be parsed as a language
 # but which are not in an ISO form
 lng_exceptions = { 'unknown': ('und', None),
                   'inconnu': ('und', None),
                   'unk': ('und', None),
                   'un': ('und', None),
                   'gr': ('gre', None),
                   'greek': ('gre', None),
                   'esp': ('spa', None),
                   'español': ('spa', None),
                   'se': ('swe', None),
                   'po': ('pt', 'br'),
                   'pb': ('pt', 'br'),
                   'pob': ('pt', 'br'),
                   'br': ('pt', 'br'),
                   'brazilian': ('pt', 'br'),
                   'català': ('cat', None),
                   'cz': ('cze', None),
                   'ua': ('ukr', None),
                   'cn': ('chi', None),
                   'chs': ('chi', None),
                   'jp': ('jpn', None),
                   'scr': ('hrv', None)
                   }
-class GuessitCountryConverter(babelfish.CountryReverseConverter):
+def is_iso_language(language):
-    def __init__(self):
+    return language.lower() in lng_all_names
        self.guessit_exceptions = {}
-        for alpha2, synlist in COUNTRIES_SYN.items():
+def is_language(language):
-            for syn in synlist:
+    return is_iso_language(language) or language in lng_exceptions
                self.guessit_exceptions[syn.lower()] = alpha2
-    @property
+def lang_set(languages, strict=False):
-    def codes(self):
+    """Return a set of guessit.Language created from their given string
-        return (babelfish.country_converters['name'].codes |
+    representation.
                frozenset(babelfish.COUNTRIES.values()) |
                frozenset(self.guessit_exceptions.keys()))
-    def convert(self, alpha2):
+    if strict is True, then this will raise an exception if any language
-        return str(babelfish.Country(alpha2))
+    could not be identified.
-
+    """
-    def reverse(self, name):
+    return set(Language(l, strict=strict) for l in languages)
        # exceptions come first, as they need to override a potential match
        # with any of the other guessers
        try:
            return self.guessit_exceptions[name.lower()]
        except KeyError:
            pass
        try:
            return babelfish.Country(name.upper()).alpha2
        except ValueError:
            pass
        for conv in [babelfish.Country.fromname]:
            try:
                return conv(name).alpha2
            except babelfish.CountryReverseError:
                pass
        raise babelfish.CountryReverseError(name)
 babelfish.country_converters['guessit'] = GuessitCountryConverter()
 class Language(UnicodeMixin):
@ -174,65 +153,109 @@ class Language(UnicodeMixin):
    >>> Language('fr')
    Language(French)
-    >>> (Language('eng').english_name) == 'English'
+    >>> s(Language('eng').french_name)
    'anglais'
    >>> s(Language('pt(br)').country.english_name)
    'Brazil'
    >>> s(Language('Español (Latinoamérica)').country.english_name)
    'Latin America'
    >>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)')
    True
-    >>> (Language('pt(br)').country.name) == 'BRAZIL'
+    >>> s(Language('zz', strict=False).english_name)
-    True
+    'Undetermined'
-    >>> (Language('zz', strict=False).english_name) == 'Undetermined'
+    >>> s(Language('pt(br)').opensubtitles)
-    True
+    'pob'
    >>> (Language('pt(br)').opensubtitles) == 'pob'
    True
    """
-    def __init__(self, language, country=None, strict=False):
+    _with_country_regexp = re.compile('(.*)\((.*)\)')
    _with_country_regexp2 = re.compile('(.*)-(.*)')
    def __init__(self, language, country=None, strict=False, scheme=None):
        language = u(language.strip().lower())
-        country = babelfish.Country(country.upper()) if country else None
+        with_country = (Language._with_country_regexp.match(language) or
                        Language._with_country_regexp2.match(language))
        if with_country:
            self.lang = Language(with_country.group(1)).lang
            self.country = Country(with_country.group(2))
            return
-        try:
+        self.lang = None
-            self.lang = babelfish.Language.fromguessit(language)
+        self.country = Country(country) if country else None
            # user given country overrides guessed one
            if country:
                self.lang.country = country
-        except babelfish.LanguageReverseError:
+        # first look for scheme specific languages
-            msg = 'The given string "%s" could not be identified as a language' % language
+        if scheme == 'opensubtitles':
-            if strict:
+            if language == 'br':
-                raise ValueError(msg)
+                self.lang = 'bre'
                return
            elif language == 'se':
                self.lang = 'sme'
                return
        elif scheme is not None:
            log.warning('Unrecognized scheme: "%s" - Proceeding with standard one' % scheme)
        # look for ISO language codes
        if len(language) == 2:
            self.lang = lng2_to_lng3.get(language)
        elif len(language) == 3:
            self.lang = (language
                         if language in lng3
                         else lng3term_to_lng3.get(language))
        else:
            self.lang = (lng_en_name_to_lng3.get(language) or
                         lng_fr_name_to_lng3.get(language))
        # general language exceptions
        if self.lang is None and language in lng_exceptions:
            lang, country = lng_exceptions[language]
            self.lang = Language(lang).alpha3
            self.country = Country(country) if country else None
        msg = 'The given string "%s" could not be identified as a language' % language
        if self.lang is None and strict:
            raise ValueError(msg)
        if self.lang is None:
            log.debug(msg)
-            self.lang = UNDETERMINED
+            self.lang = 'und'
    @property
    def country(self):
        return self.lang.country
    @property
    def alpha2(self):
-        return self.lang.alpha2
+        return lng3_to_lng2[self.lang]
    @property
    def alpha3(self):
-        return self.lang.alpha3
+        return self.lang
    @property
    def alpha3term(self):
-        return self.lang.alpha3b
+        return lng3_to_lng3term[self.lang]
    @property
    def english_name(self):
-        return self.lang.name
+        return lng3_to_lng_en_name[self.lang]
    @property
    def french_name(self):
        return lng3_to_lng_fr_name[self.lang]
    @property
    def opensubtitles(self):
-        return self.lang.opensubtitles
+        if self.lang == 'por' and self.country and self.country.alpha2 == 'br':
            return 'pob'
        elif self.lang in ['gre', 'srp']:
            return self.alpha3term
        return self.alpha3
    @property
    def tmdb(self):
        if self.country:
-            return '%s-%s' % (self.alpha2, self.country.alpha2)
+            return '%s-%s' % (self.alpha2, self.country.alpha2.upper())
        return self.alpha2
    def __hash__(self):
@ -240,8 +263,7 @@ class Language(UnicodeMixin):
    def __eq__(self, other):
        if isinstance(other, Language):
-            # in Guessit, languages are considered equal if their main languages are equal
+            return self.lang == other.lang
            return self.alpha3 == other.alpha3
        if isinstance(other, base_text_type):
            try:
@ -254,138 +276,115 @@ class Language(UnicodeMixin):
    def __ne__(self, other):
        return not self == other
-    def __bool__(self):
+    def __nonzero__(self):
-        return self.lang != UNDETERMINED
+        return self.lang != 'und'
    __nonzero__ = __bool__
    def __unicode__(self):
-        if self.lang.country:
+        if self.country:
            return '%s(%s)' % (self.english_name, self.country.alpha2)
        else:
            return self.english_name
    def __repr__(self):
-        if self.lang.country:
+        if self.country:
-            return 'Language(%s, country=%s)' % (self.english_name, self.lang.country)
+            return 'Language(%s, country=%s)' % (self.english_name, self.country)
        else:
            return 'Language(%s)' % self.english_name
-# list of common words which could be interpreted as languages, but which
+UNDETERMINED = Language('und')
-# are far too common to be able to say they represent a language in the
+ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED])
-# middle of a string (where they most likely carry their commmon meaning)
+ALL_LANGUAGES_NAMES = lng_all_names
 LNG_COMMON_WORDS = frozenset([
    # english words
    'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
    'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
    'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
    'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
    # french words
    'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
    'mal', 'est', 'vol', 'or', 'mon', 'se',
    # spanish words
    'la', 'el', 'del', 'por', 'mar',
    # other
    'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
    'vi', 'ben', 'da', 'lt', 'ch',
    # new from babelfish
    'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
    'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
    'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
    'fer', 'fun', 'two', 'big', 'psy', 'air',
    # release groups
    'bs'  # Bosnian
    ])
-
+def search_language(string, lang_filter=None, skip=None):
 subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
 subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
 lang_prefixes = ['true']
 def find_possible_languages(string):
    """Find possible languages in the string
    :return: list of tuple (property, Language, lang_word, word)
    """
    words = find_words(string)
    valid_words = []
    for word in words:
        lang_word = word.lower()
        key = 'language'
        for prefix in subtitle_prefixes:
            if lang_word.startswith(prefix):
                lang_word = lang_word[len(prefix):]
                key = 'subtitleLanguage'
        for suffix in subtitle_suffixes:
            if lang_word.endswith(suffix):
                lang_word = lang_word[:len(suffix)]
                key = 'subtitleLanguage'
        for prefix in lang_prefixes:
            if lang_word.startswith(prefix):
                lang_word = lang_word[len(prefix):]
        if not lang_word in LNG_COMMON_WORDS:
            try:
                lang = Language(lang_word)
                # Keep language with alpha2 equilavent. Others are probably an uncommon language.
                if lang == 'mul' or hasattr(lang, 'alpha2'):
                    valid_words.append((key, lang, lang_word, word))
            except babelfish.Error:
                pass
    return valid_words
 def search_language(string, lang_filter=None):
    """Looks for language patterns, and if found return the language object,
    its group span and an associated confidence.
    you can specify a list of allowed languages using the lang_filter argument,
    as in lang_filter = [ 'fr', 'eng', 'spanish' ]
-    >>> search_language('movie [en].avi')['language']
+    >>> search_language('movie [en].avi')
-    Language(English)
+    (Language(English), (7, 9), 0.8)
    >>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es'])
-
+    (None, None, None)
    """
    # list of common words which could be interpreted as languages, but which
    # are far too common to be able to say they represent a language in the
    # middle of a string (where they most likely carry their commmon meaning)
    lng_common_words = frozenset([
        # english words
        'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
        'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
        'fry', 'cop', 'zen', 'gay', 'fat', 'cherokee', 'got', 'an', 'as',
        'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
        # french words
        'bas', 'de', 'le', 'son', 'vo', 'vf', 'ne', 'ca', 'ce', 'et', 'que',
        'mal', 'est', 'vol', 'or', 'mon', 'se',
        # spanish words
        'la', 'el', 'del', 'por', 'mar',
        # other
        'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
        'vi', 'ben', 'da', 'lt'
        ])
    sep = r'[](){} \._-+'
    if lang_filter:
-        lang_filter = set(babelfish.Language.fromguessit(lang) for lang in lang_filter)
+        lang_filter = lang_set(lang_filter)
-    confidence = 1.0  # for all of them
+    slow = ' %s ' % string.lower()
    confidence = 1.0 # for all of them
-    for prop, language, lang, word in find_possible_languages(string):
+    for lang in set(find_words(slow)) & lng_all_names:
        pos = string.find(word)
        end = pos + len(word)
-        if lang_filter and language not in lang_filter:
+        if lang in lng_common_words:
            continue
-        # only allow those languages that have a 2-letter code, those that
+        pos = slow.find(lang)
        # don't are too esoteric and probably false matches
        #if language.lang not in lng3_to_lng2:
        #    continue
-        # confidence depends on alpha2, alpha3, english name, ...
+        if pos != -1:
-        if len(lang) == 2:
+            end = pos + len(lang)
-            confidence = 0.8
+            
-        elif len(lang) == 3:
+            # skip if span in in skip list
-            confidence = 0.9
+            while skip and (pos - 1, end - 1) in skip:
-        elif prop == 'subtitleLanguage':
+                pos = slow.find(lang, end)
-            confidence = 0.6  # Subtitle prefix found with language
+                if pos == -1:
-        else:
+                    continue
-            # Note: we could either be really confident that we found a
+                end = pos + len(lang)                
-            #       language or assume that full language names are too
+            if pos == -1:
-            #       common words and lower their confidence accordingly
+                continue
-            confidence = 0.3  # going with the low-confidence route here
+                            
            # make sure our word is always surrounded by separators
            if slow[pos - 1] not in sep or slow[end] not in sep:
                continue
-        return Guess({prop: language}, confidence=confidence, input=string, span=(pos, end))
+            language = Language(slow[pos:end])
            if lang_filter and language not in lang_filter:
                continue
-    return None
+            # only allow those languages that have a 2-letter code, those that
            # don't are too esoteric and probably false matches
            if language.lang not in lng3_to_lng2:
                continue
            # confidence depends on lng2, lng3, english name, ...
            if len(lang) == 2:
                confidence = 0.8
            elif len(lang) == 3:
                confidence = 0.9
            else:
                # Note: we could either be really confident that we found a
                #       language or assume that full language names are too
                #       common words and lower their confidence accordingly
                confidence = 0.3 # going with the low-confidence route here
            return language, (pos - 1, end - 1), confidence
    return None, None, None
-def guess_language(text):  # pragma: no cover
+def guess_language(text):
    """Guess the language in which a body of text is written.
    This uses the external guess-language python module, and will fail and return
@ -393,7 +392,7 @@ def guess_language(text):  # pragma: no cover
    """
    try:
        from guess_language import guessLanguage
-        return babelfish.Language.fromguessit(guessLanguage(text))
+        return Language(guessLanguage(text))
    except ImportError:
        log.error('Cannot detect the language of the given text body, missing dependency: guess-language')
--- a/lib/guessit/matcher.py
+++ b/lib/guessit/matcher.py
@ -2,8 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -19,229 +18,163 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, \
+from __future__ import unicode_literals
-    unicode_literals
+from guessit import PY3, u, base_text_type
 import logging
 from guessit import PY3, u
 from guessit.transfo import TransformerException
 from guessit.matchtree import MatchTree
 from guessit.textutils import normalize_unicode, clean_string
-from guessit.guess import Guess
+import logging
 import inspect
 log = logging.getLogger(__name__)
 class IterativeMatcher(object):
-    """An iterative matcher tries to match different patterns that appear
+    def __init__(self, filename, filetype='autodetect', opts=None, transfo_opts=None):
-    in the filename.
+        """An iterative matcher tries to match different patterns that appear
        in the filename.
-    The ``filetype`` argument indicates which type of file you want to match.
+        The 'filetype' argument indicates which type of file you want to match.
-    If it is undefined, the matcher will try to see whether it can guess
+        If it is 'autodetect', the matcher will try to see whether it can guess
-    that the file corresponds to an episode, or otherwise will assume it is
+        that the file corresponds to an episode, or otherwise will assume it is
-    a movie.
+        a movie.
-    The recognized ``filetype`` values are:
+        The recognized 'filetype' values are:
-    ``['subtitle', 'info', 'movie', 'moviesubtitle', 'movieinfo', 'episode',
+        [ autodetect, subtitle, info, movie, moviesubtitle, movieinfo, episode,
-    'episodesubtitle', 'episodeinfo']``
+        episodesubtitle, episodeinfo ]
    ``options`` is a dict of options values to be passed to the transformations used
    by the matcher.
-    The IterativeMatcher works mainly in 2 steps:
+        The IterativeMatcher works mainly in 2 steps:
-    First, it splits the filename into a match_tree, which is a tree of groups
+        First, it splits the filename into a match_tree, which is a tree of groups
-    which have a semantic meaning, such as episode number, movie title,
+        which have a semantic meaning, such as episode number, movie title,
-    etc...
+        etc...
-    The match_tree created looks like the following::
+        The match_tree created looks like the following:
-      0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
+        0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
-      0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
+        0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
-      0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
+        0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
-      __________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
+        __________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
-      xxxxxttttttttttttt               ffffff  vvvv    xxxxxx  ll lll     xx xxx         ccc
+        xxxxxttttttttttttt               ffffff  vvvv    xxxxxx  ll lll     xx xxx         ccc
-      [XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
+        [XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
-    The first 3 lines indicates the group index in which a char in the
+        The first 3 lines indicates the group index in which a char in the
-    filename is located. So for instance, ``x264`` (in the middle) is the group (0, 4, 1), and
+        filename is located. So for instance, x264 is the group (0, 4, 1), and
-    it corresponds to a video codec, denoted by the letter ``v`` in the 4th line.
+        it corresponds to a video codec, denoted by the letter'v' in the 4th line.
-    (for more info, see guess.matchtree.to_string)
+        (for more info, see guess.matchtree.to_string)
-    Second, it tries to merge all this information into a single object
+        Second, it tries to merge all this information into a single object
-    containing all the found properties, and does some (basic) conflict
+        containing all the found properties, and does some (basic) conflict
-    resolution when they arise.
+        resolution when they arise.
-    """
+
-    def __init__(self, filename, options=None, **kwargs):
+
-        options = dict(options or {})
+        When you create the Matcher, you can pass it:
-        for k, v in kwargs.items():
+         - a list 'opts' of option names, that act as global flags
-            if k not in options or not options[k]:
+         - a dict 'transfo_opts' of { transfo_name: (transfo_args, transfo_kwargs) }
-                options[k] = v  # options dict has priority over keyword arguments
+           with which to call the transfo.process() function.
-        self._validate_options(options)
+        """
        valid_filetypes = ('autodetect', 'subtitle', 'info', 'video',
                           'movie', 'moviesubtitle', 'movieinfo',
                           'episode', 'episodesubtitle', 'episodeinfo')
        if filetype not in valid_filetypes:
            raise ValueError("filetype needs to be one of %s" % valid_filetypes)
        if not PY3 and not isinstance(filename, unicode):
            log.warning('Given filename to matcher is not unicode...')
            filename = filename.decode('utf-8')
        filename = normalize_unicode(filename)
        if opts is None:
            opts = []
        if not isinstance(opts, list):
            raise ValueError('opts must be a list of option names! Received: type=%s val=%s',
                             type(opts), opts)
        if transfo_opts is None:
            transfo_opts = {}
        if not isinstance(transfo_opts, dict):
            raise ValueError('transfo_opts must be a dict of { transfo_name: (args, kwargs) }. '+
                             'Received: type=%s val=%s', type(transfo_opts), transfo_opts)
        self.match_tree = MatchTree(filename)
        self.options = options
        self._transfo_calls = []
        # sanity check: make sure we don't process a (mostly) empty string
        if clean_string(filename) == '':
            return
-        from guessit.plugins import transformers
+        mtree = self.match_tree
        mtree.guess.set('type', filetype, confidence=1.0)
-        try:
+        def apply_transfo(transfo_name, *args, **kwargs):
-            mtree = self.match_tree
+            transfo = __import__('guessit.transfo.' + transfo_name,
-            if 'type' in self.options:
+                                 globals=globals(), locals=locals(),
-                mtree.guess.set('type', self.options['type'], confidence=0.0)
+                                 fromlist=['process'], level=0)
            default_args, default_kwargs = transfo_opts.get(transfo_name, ((), {}))
            all_args = args or default_args
            all_kwargs = dict(default_kwargs)
            all_kwargs.update(kwargs) # keep all kwargs merged together
            transfo.process(mtree, *all_args, **all_kwargs)
-            # Process
+        # 1- first split our path into dirs + basename + ext
-            for transformer in transformers.all_transformers():
+        apply_transfo('split_path_components')
                self._process(transformer, False)
-            # Post-process
+        # 2- guess the file type now (will be useful later)
-            for transformer in transformers.all_transformers():
+        apply_transfo('guess_filetype', filetype)
-                self._process(transformer, True)
+        if mtree.guess['type'] == 'unknown':
            return
-            log.debug('Found match tree:\n%s' % u(mtree))
+        # 3- split each of those into explicit groups (separated by parentheses
-        except TransformerException as e:
+        #    or square brackets)
-            log.debug('An error has occured in Transformer %s: %s' % (e.transformer, e))
+        apply_transfo('split_explicit_groups')
-    def _process(self, transformer, post=False):
+        # 4- try to match information for specific patterns
-        if not hasattr(transformer, 'should_process') or transformer.should_process(self.match_tree, self.options):
+        # NOTE: order needs to comply to the following:
-            if post:
+        #       - website before language (eg: tvu.org.ru vs russian)
-                transformer.post_process(self.match_tree, self.options)
+        #       - language before episodes_rexps
-            else:
+        #       - properties before language (eg: he-aac vs hebrew)
-                transformer.process(self.match_tree, self.options)
+        #       - release_group before properties (eg: XviD-?? vs xvid)
-                self._transfo_calls.append(transformer)
+        if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
            strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
                         'guess_properties', 'guess_language',
                         'guess_video_rexps',
                         'guess_episodes_rexps', 'guess_weak_episodes_rexps' ]
        else:
            strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
                         'guess_properties', 'guess_language',
                         'guess_video_rexps' ]
-    @property
+        if 'nolanguage' in opts:
-    def second_pass_options(self):
+            strategy.remove('guess_language')
        second_pass_options = {}
        for transformer in self._transfo_calls:
            if hasattr(transformer, 'second_pass_options'):
                transformer_second_pass_options = transformer.second_pass_options(self.match_tree, self.options)
                if transformer_second_pass_options:
                    second_pass_options.update(transformer_second_pass_options)
        return second_pass_options
-    def _validate_options(self, options):
+        for name in strategy:
-        valid_filetypes = ('subtitle', 'info', 'video',
+            apply_transfo(name)
                   'movie', 'moviesubtitle', 'movieinfo',
                   'episode', 'episodesubtitle', 'episodeinfo')
-        type = options.get('type')
+        # more guessers for both movies and episodes
-        if type and type not in valid_filetypes:
+        apply_transfo('guess_bonus_features')
-            raise ValueError("filetype needs to be one of %s" % valid_filetypes)
+        apply_transfo('guess_year', skip_first_year=('skip_first_year' in opts))
        if 'nocountry' not in opts:
            apply_transfo('guess_country')
        apply_transfo('guess_idnumber')
        # split into '-' separated subgroups (with required separator chars
        # around the dash)
        apply_transfo('split_on_dash')
        # 5- try to identify the remaining unknown groups by looking at their
        #    position relative to other known elements
        if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
            apply_transfo('guess_episode_info_from_position')
        else:
            apply_transfo('guess_movie_title_from_position')
        # 6- perform some post-processing steps
        apply_transfo('post_process')
        log.debug('Found match tree:\n%s' % u(mtree))
    def matched(self):
        return self.match_tree.matched()
 def found_property(node, name, value=None, confidence=1.0, update_guess=True, logger=None):
    # automatically retrieve the log object from the caller frame
    if not logger:
        caller_frame = inspect.stack()[1][0]
        logger = caller_frame.f_locals['self'].log
    guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence)
    return found_guess(node, guess, update_guess=update_guess, logger=logger)
 def found_guess(node, guess, update_guess=True, logger=None):
    if node.guess:
        if update_guess:
            node.guess.update_highest_confidence(guess)
        else:
            child = node.add_child(guess.metadata().span)
            child.guess = guess
    else:
        node.guess = guess
    log_found_guess(guess, logger)
    return node.guess
 def log_found_guess(guess, logger=None):
    for k, v in guess.items():
        (logger or log).debug('Property found: %s=%s (confidence=%.2f)' % (k, v, guess.confidence(k)))
 class GuessFinder(object):
    def __init__(self, guess_func, confidence=None, logger=None, options=None):
        self.guess_func = guess_func
        self.confidence = confidence
        self.logger = logger or log
        self.options = options
    def process_nodes(self, nodes):
        for node in nodes:
            self.process_node(node)
    def process_node(self, node, iterative=True, partial_span=None):
        value = None
        if partial_span:
            value = node.value[partial_span[0]:partial_span[1]]
        else:
            value = node.value
        string = ' %s ' % value  # add sentinels
        if not self.options:
            matcher_result = self.guess_func(string, node)
        else:
            matcher_result = self.guess_func(string, node, self.options)
        if matcher_result:
            if not isinstance(matcher_result, Guess):
                result, span = matcher_result
            else:
                result, span = matcher_result, matcher_result.metadata().span
            if result:
                # readjust span to compensate for sentinels
                span = (span[0] - 1, span[1] - 1)
                # readjust span to compensate for partial_span
                if partial_span:
                    span = (span[0] + partial_span[0], span[1] + partial_span[0])
                partition_spans = None
                if self.options and 'skip_nodes' in self.options:
                    skip_nodes = self.options.get('skip_nodes')
                    for skip_node in skip_nodes:
                        if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
                            skip_node.span == span:
                            partition_spans = node.get_partition_spans(skip_node.span)
                            partition_spans.remove(skip_node.span)
                            break
                if not partition_spans:
                    # restore sentinels compensation
                    guess = None
                    if isinstance(result, Guess):
                        guess = result
                    else:
                        guess = Guess(result, confidence=self.confidence, input=string, span=span)
                    if not iterative:
                        node.guess.update(guess)
                    else:
                        absolute_span = (span[0] + node.offset, span[1] + node.offset)
                        node.partition(span)
                        found_child = None
                        for child in node.children:
                            if child.span == absolute_span:
                                found_guess(child, guess, self.logger)
                                found_child = child
                                break
                        for child in node.children:
                            if not child is found_child:
                                self.process_node(child)
                else:
                    for partition_span in partition_spans:
                        self.process_node(node, partial_span=partition_span)
--- a/lib/guessit/matchtree.py
+++ b/lib/guessit/matchtree.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,14 +18,12 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
-
+from guessit import UnicodeMixin, base_text_type, Guess
 import guessit  # @UnusedImport needed for doctests
 from guessit import UnicodeMixin, base_text_type
 from guessit.textutils import clean_string, str_fill
 from guessit.patterns import group_delimiters
 from guessit.guess import (merge_similar_guesses, merge_all,
-                           choose_int, choose_string, Guess)
+                           choose_int, choose_string)
 import copy
 import logging
@ -33,45 +31,8 @@ log = logging.getLogger(__name__)
 class BaseMatchTree(UnicodeMixin):
-    """A BaseMatchTree is a tree covering the filename, where each
+    """A MatchTree represents the hierarchical split of a string into its
-    node represents a substring in the filename and can have a ``Guess``
+    constituent semantic groups."""
    associated with it that contains the information that has been guessed
    in this node. Nodes can be further split into subnodes until a proper
    split has been found.
    Each node has the following attributes:
     - string = the original string of which this node represents a region
     - span = a pair of (begin, end) indices delimiting the substring
     - parent = parent node
     - children = list of children nodes
     - guess = Guess()
    BaseMatchTrees are displayed in the following way:
        >>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
        >>> print(guessit.IterativeMatcher(path).match_tree)
        000000 1111111111111111 2222222222222222222222222222222222222222222 333
        000000 0000000000111111 0000000000111111222222222222222222222222222 000
                         011112           011112000011111222222222222222222 000
                                                         011112222222222222
                                                              0000011112222
                                                              01112    0111
        Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
               tttttttttt yyyy             yyyy     fffff ssss aaa vvvv rrr ccc
        Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
    The last line contains the filename, which you can use a reference.
    The previous line contains the type of property that has been found.
    The line before that contains the filename, where all the found groups
    have been blanked. Basically, what is left on this line are the leftover
    groups which could not be identified.
    The lines before that indicate the indices of the groups in the tree.
    For instance, the part of the filename 'BDRip' is the leaf with index
    ``(2, 2, 1)`` (read from top to bottom), and its meaning is 'format'
    (as shown by the ``f``'s on the last-but-one line).
    """
    def __init__(self, string='', span=None, parent=None):
        self.string = string
@ -82,14 +43,10 @@ class BaseMatchTree(UnicodeMixin):
    @property
    def value(self):
        """Return the substring that this node matches."""
        return self.string[self.span[0]:self.span[1]]
    @property
    def clean_value(self):
        """Return a cleaned value of the matched substring, with better
        presentation formatting (punctuation marks removed, duplicate
        spaces, ...)"""
        return clean_string(self.value)
    @property
@ -98,8 +55,6 @@ class BaseMatchTree(UnicodeMixin):
    @property
    def info(self):
        """Return a dict containing all the info guessed by this node,
        subnodes included."""
        result = dict(self.guess)
        for c in self.children:
@ -109,7 +64,6 @@ class BaseMatchTree(UnicodeMixin):
    @property
    def root(self):
        """Return the root node of the tree."""
        if not self.parent:
            return self
@ -117,43 +71,28 @@ class BaseMatchTree(UnicodeMixin):
    @property
    def depth(self):
        """Return the depth of this node."""
        if self.is_leaf():
            return 0
        return 1 + max(c.depth for c in self.children)
    def is_leaf(self):
        """Return whether this node is a leaf or not."""
        return self.children == []
    def add_child(self, span):
        """Add a new child node to this node with the given span."""
        child = MatchTree(self.string, span=span, parent=self)
        self.children.append(child)
        return child
-    def get_partition_spans(self, indices):
+    def partition(self, indices):
        """Return the list of absolute spans for the regions of the original
        string defined by splitting this node at the given indices (relative
        to this node)"""
        indices = sorted(indices)
        if indices[0] != 0:
            indices.insert(0, 0)
        if indices[-1] != len(self.value):
            indices.append(len(self.value))
        spans = []
        for start, end in zip(indices[:-1], indices[1:]):
-            spans.append((self.offset + start,
+            self.add_child(span=(self.offset + start,
-                     self.offset + end))
+                                 self.offset + end))
        return spans
    def partition(self, indices):
        """Partition this node by splitting it at the given indices,
        relative to this node."""
        for partition_span in self.get_partition_spans(indices):
            self.add_child(span=partition_span)
    def split_on_components(self, components):
        offset = 0
@ -165,7 +104,6 @@ class BaseMatchTree(UnicodeMixin):
            offset = end
    def nodes_at_depth(self, depth):
        """Return all the nodes at a given depth in the tree"""
        if depth == 0:
            yield self
@ -175,32 +113,26 @@ class BaseMatchTree(UnicodeMixin):
    @property
    def node_idx(self):
        """Return this node's index in the tree, as a tuple.
        If this node is the root of the tree, then return ()."""
        if self.parent is None:
            return ()
        return self.parent.node_idx + (self.parent.children.index(self),)
    def node_at(self, idx):
        """Return the node at the given index in the subtree rooted at
        this node."""
        if not idx:
            return self
        try:
            return self.children[idx[0]].node_at(idx[1:])
-        except IndexError:
+        except:
            raise ValueError('Non-existent node index: %s' % (idx,))
    def nodes(self):
        """Return all the nodes and subnodes in this tree."""
        yield self
        for child in self.children:
            for node in child.nodes():
                yield node
    def _leaves(self):
        """Return a generator over all the nodes that are leaves."""
        if self.is_leaf():
            yield self
        else:
@ -209,73 +141,10 @@ class BaseMatchTree(UnicodeMixin):
                for leaf in child._leaves():
                    yield leaf
    def group_node(self):
        return self._other_group_node(0)
    def previous_group_node(self):
        return self._other_group_node(-1)
    def next_group_node(self):
        return self._other_group_node(+1)
    def _other_group_node(self, offset):
        if len(self.node_idx) > 1:
            group_idx = self.node_idx[:2]
            if group_idx[1] + offset >= 0:
                other_group_idx = (group_idx[0], group_idx[1] + offset)
                try:
                    other_group_node = self.root.node_at(other_group_idx)
                    return other_group_node
                except ValueError:
                    pass
        return None
    def leaves(self):
        """Return a list of all the nodes that are leaves."""
        return list(self._leaves())
    def previous_leaf(self, leaf):
        """Return previous leaf for this node"""
        return self._other_leaf(leaf, -1)
    def next_leaf(self, leaf):
        """Return next leaf for this node"""
        return self._other_leaf(leaf, +1)
    def _other_leaf(self, leaf, offset):
        leaves = self.leaves()
        index = leaves.index(leaf) + offset
        if index > 0 and index < len(leaves):
            return leaves[index]
        return None
    def previous_leaves(self, leaf):
        """Return previous leaves for this node"""
        leaves = self.leaves()
        index = leaves.index(leaf)
        if index > 0 and index < len(leaves):
            previous_leaves = leaves[:index]
            previous_leaves.reverse()
            return previous_leaves
        return []
    def next_leaves(self, leaf):
        """Return next leaves for this node"""
        leaves = self.leaves()
        index = leaves.index(leaf)
        if index > 0 and index < len(leaves):
            return leaves[index + 1:len(leaves)]
        return []
    def to_string(self):
        """Return a readable string representation of this tree.
        The result is a multi-line string, where the lines are:
         - line 1 -> N-2: each line contains the nodes at the given depth in the tree
         - line N-2: original string where all the found groups have been blanked
         - line N-1: type of property that has been found
         - line N: the original string, which you can use a reference.
        """
        empty_line = ' ' * len(self.string)
        def to_hex(x):
@ -284,27 +153,23 @@ class BaseMatchTree(UnicodeMixin):
            return x
        def meaning(result):
-            mmap = {'episodeNumber': 'E',
+            mmap = { 'episodeNumber': 'E',
-                    'season': 'S',
+                     'season': 'S',
-                    'extension': 'e',
+                     'extension': 'e',
-                    'format': 'f',
+                     'format': 'f',
-                    'language': 'l',
+                     'language': 'l',
-                    'country': 'C',
+                     'country': 'C',
-                    'videoCodec': 'v',
+                     'videoCodec': 'v',
-                    'videoProfile': 'v',
+                     'audioCodec': 'a',
-                    'audioCodec': 'a',
+                     'website': 'w',
-                    'audioProfile': 'a',
+                     'container': 'c',
-                    'audioChannels': 'a',
+                     'series': 'T',
-                    'website': 'w',
+                     'title': 't',
-                    'container': 'c',
+                     'date': 'd',
-                    'series': 'T',
+                     'year': 'y',
-                    'title': 't',
+                     'releaseGroup': 'r',
-                    'date': 'd',
+                     'screenSize': 's'
-                    'year': 'y',
+                     }
                    'releaseGroup': 'r',
                    'screenSize': 's',
                    'other': 'o'
                    }
            if result is None:
                return ' '
@ -315,7 +180,7 @@ class BaseMatchTree(UnicodeMixin):
            return 'x'
-        lines = [empty_line] * (self.depth + 2)  # +2: remaining, meaning
+        lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
        lines[-2] = self.string
        for node in self.nodes():
@ -333,22 +198,16 @@ class BaseMatchTree(UnicodeMixin):
        lines.append(self.string)
-        return '\n'.join(l.rstrip() for l in lines)
+        return '\n'.join(lines)
    def __unicode__(self):
        return self.to_string()
    def __repr__(self):
        return '<MatchTree: root=%s>' % self.value
 class MatchTree(BaseMatchTree):
    """The MatchTree contains a few "utility" methods which are not necessary
    for the BaseMatchTree, but add a lot of convenience for writing
-    higher-level rules.
+    higher-level rules."""
    """
    _matched_result = None
    def _unidentified_leaves(self,
                             valid=lambda leaf: len(leaf.clean_value) >= 2):
@ -358,12 +217,11 @@ class MatchTree(BaseMatchTree):
    def unidentified_leaves(self,
                            valid=lambda leaf: len(leaf.clean_value) >= 2):
        """Return a list of leaves that are not empty."""
        return list(self._unidentified_leaves(valid))
    def _leaves_containing(self, property_name):
        if isinstance(property_name, base_text_type):
-            property_name = [property_name]
+            property_name = [ property_name ]
        for leaf in self._leaves():
            for prop in property_name:
@ -372,11 +230,9 @@ class MatchTree(BaseMatchTree):
                    break
    def leaves_containing(self, property_name):
        """Return a list of leaves that guessed the given property."""
        return list(self._leaves_containing(property_name))
    def first_leaf_containing(self, property_name):
        """Return the first leaf containing the given property."""
        try:
            return next(self._leaves_containing(property_name))
        except StopIteration:
@ -389,8 +245,6 @@ class MatchTree(BaseMatchTree):
                yield leaf
    def previous_unidentified_leaves(self, node):
        """Return a list of non-empty leaves that are before the given
        node (in the string)."""
        return list(self._previous_unidentified_leaves(node))
    def _previous_leaves_containing(self, node, property_name):
@ -400,8 +254,6 @@ class MatchTree(BaseMatchTree):
                yield leaf
    def previous_leaves_containing(self, node, property_name):
        """Return a list of leaves containing the given property that are
        before the given node (in the string)."""
        return list(self._previous_leaves_containing(node, property_name))
    def is_explicit(self):
@ -410,30 +262,26 @@ class MatchTree(BaseMatchTree):
        return (self.value[0] + self.value[-1]) in group_delimiters
    def matched(self):
-        """Return a single guess that contains all the info found in the
+        # we need to make a copy here, as the merge functions work in place and
-        nodes of this tree, trying to merge properties as good as possible.
+        # calling them on the match tree would modify it
-        """
+        parts = [node.guess for node in self.nodes() if node.guess]
-        if not self._matched_result:
+        parts = copy.deepcopy(parts)
            # we need to make a copy here, as the merge functions work in place and
            # calling them on the match tree would modify it
            parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]
-            # 1- try to merge similar information together and give it a higher
+        # 1- try to merge similar information together and give it a higher
-            #    confidence
+        #    confidence
-            for int_part in ('year', 'season', 'episodeNumber'):
+        for int_part in ('year', 'season', 'episodeNumber'):
-                merge_similar_guesses(parts, int_part, choose_int)
+            merge_similar_guesses(parts, int_part, choose_int)
-            for string_part in ('title', 'series', 'container', 'format',
+        for string_part in ('title', 'series', 'container', 'format',
-                                'releaseGroup', 'website', 'audioCodec',
+                            'releaseGroup', 'website', 'audioCodec',
-                                'videoCodec', 'screenSize', 'episodeFormat',
+                            'videoCodec', 'screenSize', 'episodeFormat',
-                                'audioChannels', 'idNumber'):
+                            'audioChannels', 'idNumber'):
-                merge_similar_guesses(parts, string_part, choose_string)
+            merge_similar_guesses(parts, string_part, choose_string)
-            # 2- merge the rest, potentially discarding information not properly
+        # 2- merge the rest, potentially discarding information not properly
-            #    merged before
+        #    merged before
-            result = merge_all(parts,
+        result = merge_all(parts,
-                               append=['language', 'subtitleLanguage', 'other', 'special'])
+                           append=['language', 'subtitleLanguage', 'other'])
-            log.debug('Final result: ' + result.nice_string())
+        log.debug('Final result: ' + result.nice_string())
-            self._matched_result = result
+        return result
        return self._matched_result
--- a/lib/guessit/options.py
+++ b/lib/guessit/options.py
@ -1,25 +0,0 @@
 from optparse import OptionParser
 option_parser = OptionParser(usage='usage: %prog [options] file1 [file2...]')
 option_parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
                  help='display debug output')
 option_parser.add_option('-p', '--properties', dest='properties', action='store_true', default=False,
              help='Display properties that can be guessed.')
 option_parser.add_option('-l', '--values', dest='values', action='store_true', default=False,
          help='Display property values that can be guessed.')
 option_parser.add_option('-s', '--transformers', dest='transformers', action='store_true', default=False,
              help='Display transformers that can be used.')
 option_parser.add_option('-i', '--info', dest='info', default='filename',
                  help='the desired information type: filename, hash_mpc or a hash from python\'s '
                       'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
                       'them, comma-separated')
 option_parser.add_option('-n', '--name-only', dest='name_only', action='store_true', default=False,
              help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.')
 option_parser.add_option('-t', '--type', dest='type', default=None,
                  help='the suggested file type: movie, episode. If undefined, type will be guessed.')
 option_parser.add_option('-a', '--advanced', dest='advanced', action='store_true', default=False,
                  help='display advanced information for filename guesses, as json output')
 option_parser.add_option('-y', '--yaml', dest='yaml', action='store_true', default=False,
              help='display information for filename guesses as yaml output (like unit-test)')
 option_parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
                  help='run a few builtin tests instead of analyzing a file')
--- a/lib/guessit/patterns.py
+++ b/lib/guessit/patterns.py
@ -0,0 +1,250 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import unicode_literals
 import re
 subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ]
 info_exts = [ 'nfo' ]
 video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
              'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
              'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv']
 group_delimiters = [ '()', '[]', '{}' ]
 # separator character regexp
 sep = r'[][,)(}{+ /\._-]' # regexp art, hehe :D
 # character used to represent a deleted char (when matching groups)
 deleted = '_'
 # format: [ (regexp, confidence, span_adjust) ]
 episode_rexps = [ # ... Season 2 ...
                  (r'season (?P<season>[0-9]+)', 1.0, (0, 0)),
                  (r'saison (?P<season>[0-9]+)', 1.0, (0, 0)),
                  # ... s02e13 ...
                  (r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<episodeNumber>(?:-?[eE-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
                  # ... s03-x02 ... # FIXME: redundant? remove it?
                  #(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
                  # ... 2x13 ...
                  (r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
                  # ... s02 ...
                  #(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)),
                  (r's(?P<season>[0-9]{1,2})[^0-9]', 0.6, (0, -1)),
                  # v2 or v3 for some mangas which have multiples rips
                  (r'(?P<episodeNumber>[0-9]{1,3})v[23]' + sep, 0.6, (0, 0)),
                  # ... ep 23 ...
                  ('ep' + sep + r'(?P<episodeNumber>[0-9]{1,2})[^0-9]', 0.7, (0, -1)),
                  # ... e13 ... for a mini-series without a season number
                  (sep + r'e(?P<episodeNumber>[0-9]{1,2})' + sep, 0.6, (1, -1))
                  ]
 weak_episode_rexps = [ # ... 213 or 0106 ...
                       (sep + r'(?P<episodeNumber>[0-9]{2,4})' + sep, (1, -1))
                       ]
 non_episode_title = [ 'extras', 'rip' ]
 video_rexps = [ # cd number
                (r'cd ?(?P<cdNumber>[0-9])( ?of ?(?P<cdNumberTotal>[0-9]))?', 1.0, (0, 0)),
                (r'(?P<cdNumberTotal>[1-9]) cds?', 0.9, (0, 0)),
                # special editions
                (r'edition' + sep + r'(?P<edition>collector)', 1.0, (0, 0)),
                (r'(?P<edition>collector)' + sep + 'edition', 1.0, (0, 0)),
                (r'(?P<edition>special)' + sep + 'edition', 1.0, (0, 0)),
                (r'(?P<edition>criterion)' + sep + 'edition', 1.0, (0, 0)),
                # director's cut
                (r"(?P<edition>director'?s?" + sep + "cut)", 1.0, (0, 0)),
                # video size
                (r'(?P<width>[0-9]{3,4})x(?P<height>[0-9]{3,4})', 0.9, (0, 0)),
                # website
                (r'(?P<website>www(\.[a-zA-Z0-9]+){2,3})', 0.8, (0, 0)),
                # bonusNumber: ... x01 ...
                (r'x(?P<bonusNumber>[0-9]{1,2})', 1.0, (0, 0)),
                # filmNumber: ... f01 ...
                (r'f(?P<filmNumber>[0-9]{1,2})', 1.0, (0, 0))
                ]
 websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com',
             'sharethefiles.com' ]
 unlikely_series = [ 'series' ]
 # prop_multi is a dict of { property_name: { canonical_form: [ pattern ] } }
 # pattern is a string considered as a regexp, with the addition that dashes are
 # replaced with '([ \.-_])?' which matches more types of separators (or none)
 # note: simpler patterns need to be at the end of the list to not shadow more
 #       complete ones, eg: 'AAC' needs to come after 'He-AAC'
 #       ie: from most specific to less specific
 prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ],
                           'HD-DVD': [ 'HD-(?:DVD)?-Rip', 'HD-DVD' ],
                           'BluRay': [ 'Blu-ray', 'B[DR]Rip' ],
                           'HDTV': [ 'HD-TV' ],
                           'DVB': [ 'DVB-Rip', 'DVB', 'PD-TV' ],
                           'WEBRip': [ 'WEB-Rip' ],
                           'Screener': [ 'DVD-SCR', 'Screener' ],
                           'VHS': [ 'VHS' ],
                           'WEB-DL': [ 'WEB-DL' ] },
               'is3D': { True: [ '3D' ] },
               'screenSize': { '480p': [ '480[pi]?' ],
                               '720p': [ '720[pi]?' ],
                               '1080i': [ '1080i' ],
                               '1080p': [ '1080p', '1080[^i]' ] },
               'videoCodec': { 'XviD': [ 'Xvid' ],
                               'DivX': [ 'DVDivX', 'DivX' ],
                               'h264': [ '[hx]-264' ],
                               'Rv10': [ 'Rv10' ],
                               'Mpeg2': [ 'Mpeg2' ] },
               # has nothing to do here (or on filenames for that matter), but some
               # releases use it and it helps to identify release groups, so we adapt
               'videoApi': {  'DXVA': [ 'DXVA' ] },
               'audioCodec': { 'AC3': [ 'AC3' ],
                               'DTS': [ 'DTS' ],
                               'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] },
               'audioChannels': { '5.1': [ r'5\.1', 'DD5[._ ]1', '5ch' ] },
               'episodeFormat': { 'Minisode': [ 'Minisodes?' ] }
               }
 # prop_single dict of { property_name: [ canonical_form ] }
 prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA',
                                  'CHD', 'ViTE', 'TLF', 'FLAiTE',
                                  'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS',
                                  'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL',
                                  'CtrlHD', 'POD', 'WiKi','IMMERSE', 'FQM',
                                  '2HD',  'CTU', 'HALCYON', 'EbP', 'SiTV',
                                  'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV',
                                  'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3',
                                  'TrollHD', 'ECI'
                                  ],
                # potentially confusing release group names (they are words)
                'weakReleaseGroup': [ 'DEiTY', 'FiNaLe', 'UnSeeN', 'KiNGS', 'CLUE', 'DIMENSION',
                                      'SAiNTS', 'ARROW', 'EuReKA', 'SiNNERS', 'DiRTY', 'REWARD',
                                      'REPTiLE',
                                      ],
                'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5',
                           'complete', 'classic', # not so sure about these ones, could appear in a title
                           'ws' ] # widescreen
                }
 _dash = '-'
 _psep = '[-. _]?'
 def _to_rexp(prop):
    return re.compile(prop.replace(_dash, _psep), re.IGNORECASE)
 # properties_rexps dict of { property_name: { canonical_form: [ rexp ] } }
 # containing the rexps compiled from both prop_multi and prop_single
 properties_rexps = dict((type, dict((canonical_form,
                                     [ _to_rexp(pattern) for pattern in patterns ])
                                    for canonical_form, patterns in props.items()))
                        for type, props in prop_multi.items())
 properties_rexps.update(dict((type, dict((canonical_form, [ _to_rexp(canonical_form) ])
                                         for canonical_form in props))
                             for type, props in prop_single.items()))
 def find_properties(string):
    result = []
    for property_name, props in properties_rexps.items():
        # FIXME: this should be done in a more flexible way...
        if property_name in ['weakReleaseGroup']:
            continue
        for canonical_form, rexps in props.items():
            for value_rexp in rexps:
                match = value_rexp.search(string)
                if match:
                    start, end = match.span()
                    # make sure our word is always surrounded by separators
                    # note: sep is a regexp, but in this case using it as
                    #       a char sequence achieves the same goal
                    if ((start > 0 and string[start-1] not in sep) or
                        (end < len(string) and string[end] not in sep)):
                        continue
                    result.append((property_name, canonical_form, start, end))
    return result
 property_synonyms = { 'Special Edition': [ 'Special' ],
                      'Collector Edition': [ 'Collector' ],
                      'Criterion Edition': [ 'Criterion' ]
                      }
 def revert_synonyms():
    reverse = {}
    for canonical, synonyms in property_synonyms.items():
        for synonym in synonyms:
            reverse[synonym.lower()] = canonical
    return reverse
 reverse_synonyms = revert_synonyms()
 def canonical_form(string):
    return reverse_synonyms.get(string.lower(), string)
 def compute_canonical_form(property_name, value):
    """Return the canonical form of a property given its type if it is a valid
    one, None otherwise."""
    if isinstance(value, basestring):
        for canonical_form, rexps in properties_rexps[property_name].items():
            for rexp in rexps:
                if rexp.match(value):
                    return canonical_form
    return None
--- a/lib/guessit/patterns/init.py
+++ b/lib/guessit/patterns/init.py
@ -1,77 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 import re
 from guessit import base_text_type
 group_delimiters = ['()', '[]', '{}']
 # separator character regexp
 sep = r'[][,)(}:{+ /\._-]'  # regexp art, hehe :D
 _dash = '-'
 _psep = '[\W_]?'
 def build_or_pattern(patterns):
    """Build a or pattern string from a list of possible patterns
    """
    or_pattern = ''
    for pattern in patterns:
        if not or_pattern:
            or_pattern += '(?:'
        else:
            or_pattern += '|'
        or_pattern += ('(?:%s)' % pattern)
    or_pattern += ')'
    return or_pattern
 def compile_pattern(pattern, enhance=True):
    """Compile and enhance a pattern
    :param pattern: Pattern to compile (regexp).
    :type pattern: string
    :param pattern: Enhance pattern before compiling.
    :type pattern: string
    :return: The compiled pattern
    :rtype: regular expression object
    """
    return re.compile(enhance_pattern(pattern) if enhance else pattern, re.IGNORECASE)
 def enhance_pattern(pattern):
    """Enhance pattern to match more equivalent values.
    '-' are replaced by '[\W_]?', which matches more types of separators (or none)
    :param pattern: Pattern to enhance (regexp).
    :type pattern: string
    :return: The enhanced pattern
    :rtype: string
    """
    return pattern.replace(_dash, _psep)
--- a/lib/guessit/patterns/extension.py
+++ b/lib/guessit/patterns/extension.py
@ -1,32 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 # Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 subtitle_exts = ['srt', 'idx', 'sub', 'ssa']
 info_exts = ['nfo']
 video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
              'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
              'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
              'iso']
--- a/lib/guessit/patterns/numeral.py
+++ b/lib/guessit/patterns/numeral.py
@ -1,150 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 import re
 digital_numeral = '\d{1,3}'
 roman_numeral = "(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})"
 english_word_numeral_list = [
  'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
  'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
 ]
 french_word_numeral_list = [
  'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
  'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
 ]
 french_alt_word_numeral_list = [
  'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
  'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
 ]
 def __build_word_numeral(*args, **kwargs):
    re = None
    for word_list in args:
        for word in word_list:
            if not re:
                re = '(?:(?=\w+)'
            else:
                re += '|'
            re += word
    re += ')'
    return re
 word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
 numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
 __romanNumeralMap = (
                   ('M', 1000),
                   ('CM', 900),
                   ('D', 500),
                   ('CD', 400),
                   ('C', 100),
                   ('XC', 90),
                   ('L', 50),
                   ('XL', 40),
                   ('X', 10),
                   ('IX', 9),
                   ('V', 5),
                   ('IV', 4),
                   ('I', 1)
                   )
 __romanNumeralPattern = re.compile('^' + roman_numeral + '$')
 def __parse_roman(value):
    """convert Roman numeral to integer"""
    if not __romanNumeralPattern.search(value):
        raise ValueError('Invalid Roman numeral: %s' % value)
    result = 0
    index = 0
    for numeral, integer in __romanNumeralMap:
        while value[index:index + len(numeral)] == numeral:
            result += integer
            index += len(numeral)
    return result
 def __parse_word(value):
    """Convert Word numeral to integer"""
    for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
        try:
            return word_list.index(value)
        except ValueError:
            pass
    raise ValueError
 _clean_re = re.compile('[^\d]*(\d+)[^\d]*')
 def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
    """Parse a numeric value into integer.
    input can be an integer as a string, a roman numeral or a word
    :param value: Value to parse. Can be an integer, roman numeral or word.
    :type value: string
    :return: Numeric value, or None if value can't be parsed
    :rtype: int
    """
    if int_enabled:
        try:
            if clean:
                match = _clean_re.match(value)
                if match:
                    clean_value = match.group(1)
                    return int(clean_value)
            return int(value)
        except ValueError:
            pass
    if roman_enabled:
        try:
            if clean:
                for word in value.split():
                    try:
                        return __parse_roman(word)
                    except ValueError:
                        pass
            return __parse_roman(value)
        except ValueError:
            pass
    if word_enabled:
        try:
            if clean:
                for word in value.split():
                    try:
                        return __parse_word(word)
                    except ValueError:
                        pass
            return __parse_word(value)
        except ValueError:
            pass
    raise ValueError('Invalid numeral: ' + value)
--- a/lib/guessit/plugins/init.py
+++ b/lib/guessit/plugins/init.py
@ -1,21 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
--- a/lib/guessit/plugins/transformers.py
+++ b/lib/guessit/plugins/transformers.py
@ -1,186 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from stevedore import ExtensionManager
 from pkg_resources import EntryPoint
 from stevedore.extension import Extension
 from logging import getLogger
 log = getLogger(__name__)
 class Transformer(object):  # pragma: no cover
    def __init__(self, priority=0):
        self.priority = priority
        self.log = getLogger(self.name)
    @property
    def name(self):
        return self.__class__.__name__
    def supported_properties(self):
        return {}
    def second_pass_options(self, mtree, options=None):
        return None
    def should_process(self, mtree, options=None):
        return True
    def process(self, mtree, options=None):
        pass
    def post_process(self, mtree, options=None):
        pass
    def rate_quality(self, guess, *props):
        return 0
 class CustomTransformerExtensionManager(ExtensionManager):
    def __init__(self, namespace='guessit.transformer', invoke_on_load=True,
        invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None,
                 verify_requirements=False):
        super(CustomTransformerExtensionManager, self).__init__(namespace=namespace,
                 invoke_on_load=invoke_on_load,
                 invoke_args=invoke_args,
                 invoke_kwds=invoke_kwds,
                 propagate_map_exceptions=propagate_map_exceptions,
                 on_load_failure_callback=on_load_failure_callback,
                 verify_requirements=verify_requirements)
    def order_extensions(self, extensions):
        """Order the loaded transformers
        It should follow those rules
           - website before language (eg: tvu.org.ru vs russian)
           - language before episodes_rexps
           - properties before language (eg: he-aac vs hebrew)
           - release_group before properties (eg: XviD-?? vs xvid)
        """
        extensions.sort(key=lambda ext: -ext.obj.priority)
        return extensions
    def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
        if not ep.dist:
            plugin = ep.load(require=False)
        else:
            plugin = ep.load(require=verify_requirements)
        if invoke_on_load:
            obj = plugin(*invoke_args, **invoke_kwds)
        else:
            obj = None
        return Extension(ep.name, ep, plugin, obj)
    def _load_plugins(self, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
        return self.order_extensions(super(CustomTransformerExtensionManager, self)._load_plugins(invoke_on_load, invoke_args, invoke_kwds, verify_requirements))
    def objects(self):
        return self.map(self._get_obj)
    def _get_obj(self, ext):
        return ext.obj
    def object(self, name):
        try:
            return self[name].obj
        except KeyError:
            return None
    def register_module(self, name, module_name):
        ep = EntryPoint(name, module_name)
        loaded = self._load_one_plugin(ep, invoke_on_load=True, invoke_args=(), invoke_kwds={})
        if loaded:
            self.extensions.append(loaded)
            self.extensions = self.order_extensions(self.extensions)
            self._extensions_by_name = None
 class DefaultTransformerExtensionManager(CustomTransformerExtensionManager):
    @property
    def _internal_entry_points(self):
        return ['split_path_components = guessit.transfo.split_path_components:SplitPathComponents',
                                    'guess_filetype = guessit.transfo.guess_filetype:GuessFiletype',
                                    'split_explicit_groups = guessit.transfo.split_explicit_groups:SplitExplicitGroups',
                                    'guess_date = guessit.transfo.guess_date:GuessDate',
                                    'guess_website = guessit.transfo.guess_website:GuessWebsite',
                                    'guess_release_group = guessit.transfo.guess_release_group:GuessReleaseGroup',
                                    'guess_properties = guessit.transfo.guess_properties:GuessProperties',
                                    'guess_language = guessit.transfo.guess_language:GuessLanguage',
                                    'guess_video_rexps = guessit.transfo.guess_video_rexps:GuessVideoRexps',
                                    'guess_episodes_rexps = guessit.transfo.guess_episodes_rexps:GuessEpisodesRexps',
                                    'guess_weak_episodes_rexps = guessit.transfo.guess_weak_episodes_rexps:GuessWeakEpisodesRexps',
                                    'guess_bonus_features = guessit.transfo.guess_bonus_features:GuessBonusFeatures',
                                    'guess_year = guessit.transfo.guess_year:GuessYear',
                                    'guess_country = guessit.transfo.guess_country:GuessCountry',
                                    'guess_idnumber = guessit.transfo.guess_idnumber:GuessIdnumber',
                                    'split_on_dash = guessit.transfo.split_on_dash:SplitOnDash',
                                    'guess_episode_info_from_position = guessit.transfo.guess_episode_info_from_position:GuessEpisodeInfoFromPosition',
                                    'guess_movie_title_from_position = guessit.transfo.guess_movie_title_from_position:GuessMovieTitleFromPosition',
                                    'guess_episode_special = guessit.transfo.guess_episode_special:GuessEpisodeSpecial']
    def _find_entry_points(self, namespace):
        entry_points = {}
        # Internal entry points
        if namespace == self.namespace:
            for internal_entry_point_str in self._internal_entry_points:
                internal_entry_point = EntryPoint.parse(internal_entry_point_str)
                entry_points[internal_entry_point.name] = internal_entry_point
        # Package entry points
        setuptools_entrypoints = super(DefaultTransformerExtensionManager, self)._find_entry_points(namespace)
        for setuptools_entrypoint in setuptools_entrypoints:
            entry_points[setuptools_entrypoint.name] = setuptools_entrypoint
        return list(entry_points.values())
 _extensions = None
 def all_transformers():
    return _extensions.objects()
 def get_transformer(name):
    return _extensions.object(name)
 def add_transformer(name, module_name):
    _extensions.register_module(name, module_name)
 def reload(custom=False):
    """
    Reload extension manager with default or custom one.
    :param custom: if True, custom manager will be used, else default one.
    Default manager will load default extensions from guessit and setuptools packaging extensions
    Custom manager will not load default extensions from guessit, using only setuptools packaging extensions.
    :type custom: boolean
    """
    global _extensions
    if custom:
        _extensions = CustomTransformerExtensionManager()
    else:
        _extensions = DefaultTransformerExtensionManager()
 reload()
--- a/lib/guessit/quality.py
+++ b/lib/guessit/quality.py
@ -1,65 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.plugins.transformers import all_transformers
 def best_quality_properties(props, *guesses):
    """Retrieve the best quality guess, based on given properties
    :param props: Properties to include in the rating
    :type props: list of strings
    :param guesses: Guesses to rate
    :type guesses: :class:`guessit.guess.Guess`
    :return: Best quality guess from all passed guesses
    :rtype: :class:`guessit.guess.Guess`
    """
    best_guess = None
    best_rate = None
    for guess in guesses:
        for transformer in all_transformers():
            rate = transformer.rate_quality(guess, *props)
            if best_rate is None or best_rate < rate:
                best_rate = rate
                best_guess = guess
    return best_guess
 def best_quality(*guesses):
    """Retrieve the best quality guess.
    :param guesses: Guesses to rate
    :type guesses: :class:`guessit.guess.Guess`
    :return: Best quality guess from all passed guesses
    :rtype: :class:`guessit.guess.Guess`
    """
    best_guess = None
    best_rate = None
    for guess in guesses:
        for transformer in all_transformers():
            rate = transformer.rate_quality(guess)
            if best_rate is None or best_rate < rate:
                best_rate = rate
                best_guess = guess
    return best_guess
--- a/lib/guessit/slogging.py
+++ b/lib/guessit/slogging.py
@ -1,28 +1,28 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# GuessIt - A library for guessing information from filenames
+# Smewt - A smart collection manager
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 #
-# GuessIt is free software; you can redistribute it and/or modify it under
+# Smewt is free software; you can redistribute it and/or modify
-# the terms of the Lesser GNU General Public License as published by
+# it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
-# GuessIt is distributed in the hope that it will be useful,
+# Smewt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# Lesser GNU General Public License for more details.
+# GNU General Public License for more details.
 #
-# You should have received a copy of the Lesser GNU General Public License
+# You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 import logging
 import sys
-import os
+import os, os.path
 GREEN_FONT = "\x1B[0;32m"
 YELLOW_FONT = "\x1B[0;33m"
@ -31,7 +31,7 @@ RED_FONT = "\x1B[0;31m"
 RESET_FONT = "\x1B[0m"
-def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):  # pragma: no cover
+def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):
    """Set up a nice colored logger as the main application logger."""
    class SimpleFormatter(logging.Formatter):
--- a/lib/guessit/test/init.py
+++ b/lib/guessit/test/init.py
@ -1,26 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 import logging
 from guessit.slogging import setupLogging
 setupLogging()
 logging.disable(logging.INFO)
--- a/lib/guessit/test/main.py
+++ b/lib/guessit/test/main.py
@ -1,40 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test import (test_api, test_autodetect, test_autodetect_all, test_doctests,
                          test_episode, test_hashes, test_language, test_main,
                          test_matchtree, test_movie, test_quality, test_utils)
 from unittest import TextTestRunner
 import logging
 def main():
    for suite in [test_api.suite, test_autodetect.suite,
                  test_autodetect_all.suite, test_doctests.suite,
                  test_episode.suite, test_hashes.suite, test_language.suite,
                  test_main.suite, test_matchtree.suite, test_movie.suite,
                  test_quality.suite, test_utils.suite]:
        TextTestRunner(verbosity=2).run(suite)
 if __name__ == '__main__':
    main()
--- a/lib/guessit/test/autodetect.yaml
+++ b/lib/guessit/test/autodetect.yaml
@ -1,289 +0,0 @@
 ? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
 : type: movie
  title: Fear and Loathing in Las Vegas
  year: 1998
  screenSize: 720p
  format: HD-DVD
  audioCodec: DTS
  videoCodec: h264
  releaseGroup: ESiR
 ? Leopard.dmg
 : type: unknown
  extension: dmg
 ? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
 : type: episode
  series: Duckman
  season: 1
  episodeNumber: 1
  title: I, Duckman
  date: 2002-11-07
 ? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
 : type: episode
  series: Neverwhere
  episodeNumber: 5
  title: Down Street
  website: tvu.org.ru
 ? Neverwhere.05.Down.Street.[tvu.org.ru].avi
 : type: episode
  series: Neverwhere
  episodeNumber: 5
  title: Down Street
  website: tvu.org.ru
 ? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
 : type: episode
  series: Breaking Bad
  episodeFormat: Minisode
  episodeNumber: 1
  title: Good Cop Bad Cop
  format: WEBRip
  videoCodec: XviD
 ? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
 : type: episode
  series: Kaamelott
  episodeNumber: 23
  title: Le Forfait
 ? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
 : type: movie
  title: The Doors
  year: 1991
  date: 2008-03-09
  format: BluRay
  screenSize: 720p
  audioCodec: AC3
  videoCodec: h264
  releaseGroup: HiS@SiLUHD
  language: english
  website: sharethefiles.com
 ? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
 : type: movie
  title: M.A.S.H.
  year: 1970
  videoCodec: DivX
  format: DVD
 ? the.mentalist.501.hdtv-lol.mp4
 : type: episode
  series: The Mentalist
  season: 5
  episodeNumber: 1
  format: HDTV
  releaseGroup: LOL
 ? the.simpsons.2401.hdtv-lol.mp4
 : type: episode
  series: The Simpsons
  season: 24
  episodeNumber: 1
  format: HDTV
  releaseGroup: LOL
 ? Homeland.S02E01.HDTV.x264-EVOLVE.mp4
 : type: episode
  series: Homeland
  season: 2
  episodeNumber: 1
  format: HDTV
  videoCodec: h264
  releaseGroup: EVOLVE
 ? /media/Band_of_Brothers-e01-Currahee.mkv
 : type: episode
  series: Band of Brothers
  episodeNumber: 1
  title: Currahee
 ? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
 : type: episode
  series: Band of Brothers
  bonusNumber: 2
  bonusTitle: We Stand Alone Together
 ? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
 : type: movie
  title: Casino Royale
  filmSeries: James Bond
  filmNumber: 21
  bonusNumber: 2
  bonusTitle: Stunts
 ? /TV Shows/new.girl.117.hdtv-lol.mp4
 : type: episode
  series: New Girl
  season: 1
  episodeNumber: 17
  format: HDTV
  releaseGroup: LOL
 ? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
 : type: episode
  series: The Office (US)
  country: US
  season: 1
  episodeNumber: 3
  title: Health Care
  format: HDTV
  videoCodec: XviD
  releaseGroup: LOL
 ? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
 : type: movie
  title: The Insider
  year: 1999
  bonusNumber: 2
  bonusTitle: 60 Minutes Interview-1996
 ? OSS_117--Cairo,_Nest_of_Spies.mkv
 : type: movie
  title: OSS 117--Cairo, Nest of Spies
 ? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
 : type: movie
  title: Rush Beyond The Lighted Stage
  bonusNumber: 9
  bonusTitle: Between Sun and Moon-2002 Hartford
 ? House.Hunters.International.S56E06.720p.hdtv.x264.mp4
 : type: episode
  series: House Hunters International
  season: 56
  episodeNumber: 6
  screenSize: 720p
  format: HDTV
  videoCodec: h264
 ? White.House.Down.2013.1080p.BluRay.DTS-HD.MA.5.1.x264-PublicHD.mkv
 : type: movie
  title: White House Down
  year: 2013
  screenSize: 1080p
  format: BluRay
  audioCodec: DTS
  audioProfile: HDMA
  videoCodec: h264
  releaseGroup: PublicHD
  audioChannels: "5.1"
 ? Hostages.S01E01.Pilot.for.Air.720p.WEB-DL.DD5.1.H.264-NTb.nfo
 : type: episodeinfo
  series: Hostages
  title: Pilot for Air
  season: 1
  episodeNumber: 1
  screenSize: 720p
  format: WEB-DL
  audioChannels: "5.1"
  videoCodec: h264
  audioCodec: DolbyDigital
  releaseGroup: NTb
 ? Despicable.Me.2.2013.1080p.BluRay.x264-VeDeTT.nfo
 : type: movieinfo
  title: Despicable Me 2
  year: 2013
  screenSize: 1080p
  format: BluRay
  videoCodec: h264
  releaseGroup: VeDeTT
 ? Le Cinquieme Commando 1971 SUBFORCED FRENCH DVDRiP XViD AC3 Bandix.mkv
 : type: movie
  audioCodec: AC3
  format: DVD
  releaseGroup: Bandix
  subtitleLanguage: French
  title: Le Cinquieme Commando
  videoCodec: XviD
  year: 1971
 ? Le Seigneur des Anneaux - La Communauté de l'Anneau - Version Longue - BDRip.mkv
 : type: movie
  format: BluRay
  title: Le Seigneur des Anneaux
 ? La petite bande (Michel Deville - 1983) VF PAL MP4 x264 AAC.mkv
 : type: movie
  audioCodec: AAC
  language: French
  title: La petite bande
  videoCodec: h264
  year: 1983
 ? Retour de Flammes (Gregor Schnitzler 2003) FULL DVD.iso
 : type: movie
  format: DVD
  title: Retour de Flammes
  type: movie
  year: 2003
 ? A.Common.Title.Special.2014.avi
 : type: movie
  year: 2014
  title: A Common Title Special
 ? A.Common.Title.2014.Special.avi
 : type: episode
  year: 2014
  series: A Common Title
  title: Special
  special: Special
 ? A.Common.Title.2014.Special.Edition.avi
 : type: movie
  year: 2014
  title: A Common Title
  edition: Special Edition
 ? Downton.Abbey.2013.Christmas.Special.HDTV.x264-FoV.mp4
 : type: episode
  year: 2013
  series: Downton Abbey
  title: Christmas Special
  videoCodec: h264
  releaseGroup: FoV
  format: HDTV
  special: Special
 ? Doctor_Who_2013_Christmas_Special.The_Time_of_The_Doctor.HD
 : options: -n
  type: episode
  series: Doctor Who
  other: HD
  special: Special
  title: Christmas Special The Time of The Doctor
  year: 2013
 ? Doctor Who 2005 50th Anniversary Special The Day of the Doctor 3.avi
 : type: episode
  series: Doctor Who
  special: Special
  title: 50th Anniversary Special The Day of the Doctor 3
  year: 2005
 ? Robot Chicken S06-Born Again Virgin Christmas Special HDTV x264.avi
 : type: episode
  series: Robot Chicken
  format: HDTV
  season: 6
  title: Born Again Virgin Christmas Special
  videoCodec: h264
  special: Special
 ? Wicked.Tuna.S03E00.Head.To.Tail.Special.HDTV.x264-YesTV
 : options: -n
  type: episode
  series: Wicked Tuna
  title: Head To Tail Special
  releaseGroup: YesTV
  season: 3
  episodeNumber: 0
  videoCodec: h264
  format: HDTV
  special: Special
--- a/lib/guessit/test/dummy.srt
+++ b/lib/guessit/test/dummy.srt
@ -1 +0,0 @@
 Just a dummy srt file (used for unittests: do not remove!)
--- a/lib/guessit/test/episodes.yaml
+++ b/lib/guessit/test/episodes.yaml
@ -1,569 +0,0 @@
 # Dubious tests
 #
 #? "finale "
 #: releaseGroup: FiNaLe
 #  extension: ""
 ? Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.avi
 : series: Californication
  season: 2
  episodeNumber: 5
  title: Vaginatown
  format: HDTV
  videoCodec: XviD
  releaseGroup: 0TV
 ? Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi
 : series: Dexter
  season: 5
  episodeNumber: 2
  title: Hello, Bandit
  language: English
  subtitleLanguage: French
  format: HDTV
  videoCodec: XviD
  releaseGroup: AlFleNi-TeaM
  website: tvu.org.ru
 ? Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi
 : series: Treme
  season: 1
  episodeNumber: 3
  title: Right Place, Wrong Time
  format: HDTV
  videoCodec: XviD
  releaseGroup: NoTV
 ? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
 : series: Duckman
  season: 1
  episodeNumber: 1
  title: I, Duckman
  date: 2002-11-07
 ? Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi
 : series: Duckman
  season: 1
  episodeNumber: 13
  title: Joking The Chicken
 ? Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.avi
 : series: The Simpsons
  season: 12
  episodeNumber: 8
  title: A Bas Le Sergent Skinner
  language: French
 ? Series/Futurama/Season 3 (mkv)/[™] Futurama - S03E22 - Le chef de fer à 30% ( 30 Percent Iron Chef ).mkv
 : series: Futurama
  season: 3
  episodeNumber: 22
  title: Le chef de fer à 30%
 ? Series/The Office/Season 6/The Office - S06xE01.avi
 : series: The Office
  season: 6
  episodeNumber: 1
 ? series/The Office/Season 4/The Office [401] Fun Run.avi
 : series: The Office
  season: 4
  episodeNumber: 1
  title: Fun Run
 ? Series/Mad Men Season 1 Complete/Mad.Men.S01E01.avi
 : series: Mad Men
  season: 1
  episodeNumber: 1
  other: complete
 ? series/Psych/Psych S02 Season 2 Complete English DVD/Psych.S02E02.65.Million.Years.Off.avi
 : series: Psych
  season: 2
  episodeNumber: 2
  title: 65 Million Years Off
  language: english
  format: DVD
  other: complete
 ? series/Psych/Psych S02 Season 2 Complete English DVD/Psych.S02E03.Psy.Vs.Psy.Français.srt
 : series: Psych
  season: 2
  episodeNumber: 3
  title: Psy Vs Psy
  format: DVD
  language: English
  subtitleLanguage: French
  other: complete
 ? Series/Pure Laine/Pure.Laine.1x01.Toutes.Couleurs.Unies.FR.(Québec).DVB-Kceb.[tvu.org.ru].avi
 : series: Pure Laine
  season: 1
  episodeNumber: 1
  title: Toutes Couleurs Unies
  format: DVB
  releaseGroup: Kceb
  language: french
  website: tvu.org.ru
 ? Series/Pure Laine/2x05 - Pure Laine - Je Me Souviens.avi
 : series: Pure Laine
  season: 2
  episodeNumber: 5
  title: Je Me Souviens
 ? Series/Tout sur moi/Tout sur moi - S02E02 - Ménage à trois (14-01-2008) [Rip by Ampli].avi
 : series: Tout sur moi
  season: 2
  episodeNumber: 2
  title: Ménage à trois
  date: 2008-01-14
 ? The.Mentalist.2x21.18-5-4.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi
 : series: The Mentalist
  season: 2
  episodeNumber: 21
  title: 18-5-4
  language: english
  subtitleLanguage: french
  format: HDTV
  videoCodec: Xvid
  releaseGroup: AlFleNi-TeaM
  website: tvu.org.ru
 ? series/__ Incomplete __/Dr Slump (Catalan)/Dr._Slump_-_003_DVB-Rip_Catalan_by_kelf.avi
 : series: Dr Slump
  episodeNumber: 3
  format: DVB
  language: catalan
 ? series/Ren and Stimpy - Black_hole_[DivX].avi
 : series: Ren and Stimpy
  title: Black hole
  videoCodec: DivX
 ? Series/Walt Disney/Donald.Duck.-.Good.Scouts.[www.bigernie.jump.to].avi
 : series: Donald Duck
  title: Good Scouts
  website: www.bigernie.jump.to
 ? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
 : series: Neverwhere
  episodeNumber: 5
  title: Down Street
  website: tvu.org.ru
 ? Series/South Park/Season 4/South.Park.4x07.Cherokee.Hair.Tampons.DVDRip.[tvu.org.ru].avi
 : series: South Park
  season: 4
  episodeNumber: 7
  title: Cherokee Hair Tampons
  format: DVD
  website: tvu.org.ru
 ? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
 : series: Kaamelott
  episodeNumber: 23
  title: Le Forfait
 ? Series/Duckman/Duckman - 110 (10) - 20021218 - Cellar Beware.avi
 : series: Duckman
  season: 1
  episodeNumber: 10
  date: 2002-12-18
  title: Cellar Beware
 ? Series/Ren & Stimpy/Ren And Stimpy - Onward & Upward-Adult Party Cartoon.avi
 : series: Ren And Stimpy
  title: Onward & Upward-Adult Party Cartoon
 ? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
 : series: Breaking Bad
  episodeFormat: Minisode
  episodeNumber: 1
  title: Good Cop Bad Cop
  format: WEBRip
  videoCodec: XviD
 ? Series/My Name Is Earl/My.Name.Is.Earl.S01Extras.-.Bad.Karma.DVDRip.XviD.avi
 : series: My Name Is Earl
  season: 1
  title: Bad Karma
  format: DVD
  special: Extras
  videoCodec: XviD
 ? /mnt/series/The Big Bang Theory/S01/The.Big.Bang.Theory.S01E01.mkv
 : series: The Big Bang Theory
  season: 1
  episodeNumber: 1
 ? /media/Parks_and_Recreation-s03-e01.mkv
 : series: Parks and Recreation
  season: 3
  episodeNumber: 1
 ? /media/Parks_and_Recreation-s03-e02-Flu_Season.mkv
 : series: Parks and Recreation
  season: 3
  title: Flu Season
  episodeNumber: 2
 ? /media/Parks_and_Recreation-s03-x01.mkv
 : series: Parks and Recreation
  season: 3
  bonusNumber: 1
 ? /media/Parks_and_Recreation-s03-x02-Gag_Reel.mkv
 : series: Parks and Recreation
  season: 3
  bonusNumber: 2
  bonusTitle: Gag Reel
 ? /media/Band_of_Brothers-e01-Currahee.mkv
 : series: Band of Brothers
  episodeNumber: 1
  title: Currahee
 ? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
 : series: Band of Brothers
  bonusNumber: 2
  bonusTitle: We Stand Alone Together
 ? /TV Shows/Mad.M-5x9.mkv
 : series: Mad M
  season: 5
  episodeNumber: 9
 ? /TV Shows/new.girl.117.hdtv-lol.mp4
 : series: New Girl
  season: 1
  episodeNumber: 17
  format: HDTV
  releaseGroup: LOL
 ? Kaamelott - 5x44x45x46x47x48x49x50.avi
 : series: Kaamelott
  season: 5
  episodeNumber: 44
  episodeList: [44, 45, 46, 47, 48, 49, 50]
 ? Example S01E01-02.avi
 : series: Example
  season: 1
  episodeNumber: 1
  episodeList: [1, 2]
 ? Example S01E01E02.avi
 : series: Example
  season: 1
  episodeNumber: 1
  episodeList: [1, 2]
 ? Series/Baccano!/Baccano!_-_T1_-_Trailer_-_[Ayu](dae8173e).mkv
 : series: Baccano!
  other: Trailer
 ? Series/Doctor Who (2005)/Season 06/Doctor Who (2005) - S06E01 - The Impossible Astronaut (1).avi
 : series: Doctor Who
  year: 2005
  season: 6
  episodeNumber: 1
  title: The Impossible Astronaut
 ? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
 : series: The Office (US)
  country: US
  season: 1
  episodeNumber: 3
  title: Health Care
  format: HDTV
  videoCodec: XviD
  releaseGroup: LOL
 ? /Volumes/data-1/Series/Futurama/Season 3/Futurama_-_S03_DVD_Bonus_-_Deleted_Scenes_Part_3.ogm
 : series: Futurama
  season: 3
  other: Bonus
  title: Deleted Scenes Part 3
  format: DVD
 ? Ben.and.Kate.S01E02.720p.HDTV.X264-DIMENSION.mkv
 : series: Ben and Kate
  season: 1
  episodeNumber: 2
  screenSize: 720p
  format: HDTV
  videoCodec: h264
  releaseGroup: DIMENSION
 ? /volume1/TV Series/Drawn Together/Season 1/Drawn Together 1x04 Requiem for a Reality Show.avi
 : series: Drawn Together
  season: 1
  episodeNumber: 4
  title: Requiem for a Reality Show
 ? Sons.of.Anarchy.S05E06.720p.WEB.DL.DD5.1.H.264-CtrlHD.mkv
 : series: Sons of Anarchy
  season: 5
  episodeNumber: 6
  screenSize: 720p
  format: WEB-DL
  audioChannels: "5.1"
  audioCodec: DolbyDigital
  videoCodec: h264
  releaseGroup: CtrlHD
 ? /media/bdc64bfe-e36f-4af8-b550-e6fd2dfaa507/TV_Shows/Doctor Who (2005)/Saison 6/Doctor Who (2005) - S06E13 - The Wedding of River Song.mkv
 : series: Doctor Who
  season: 6
  episodeNumber: 13
  year: 2005
  title: The Wedding of River Song
  idNumber: bdc64bfe-e36f-4af8-b550-e6fd2dfaa507
 ? /mnt/videos/tvshows/Doctor Who/Season 06/E13 - The Wedding of River Song.mkv
 : series: Doctor Who
  season: 6
  episodeNumber: 13
  title: The Wedding of River Song
 ? The.Simpsons.S24E03.Adventures.in.Baby-Getting.720p.WEB-DL.DD5.1.H.264-CtrlHD.mkv
 : series: The Simpsons
  season: 24
  episodeNumber: 3
  title: Adventures in Baby-Getting
  screenSize: 720p
  format: WEB-DL
  audioChannels: "5.1"
  audioCodec: DolbyDigital
  videoCodec: h264
  releaseGroup: CtrlHD
 ? /home/disaster/Videos/TV/Merlin/merlin_2008.5x02.arthurs_bane_part_two.repack.720p_hdtv_x264-fov.mkv
 : series: Merlin
  season: 5
  episodeNumber: 2
  title: Arthurs bane part two
  screenSize: 720p
  format: HDTV
  videoCodec: h264
  releaseGroup: Fov
  year: 2008
  other: Proper
 ? "Da Vinci's Demons - 1x04 - The Magician.mkv"
 : series: "Da Vinci's Demons"
  season: 1
  episodeNumber: 4
  title: The Magician
 ? CSI.S013E18.Sheltered.720p.WEB-DL.DD5.1.H.264.mkv
 : series: CSI
  season: 13
  episodeNumber: 18
  title: Sheltered
  screenSize: 720p
  format: WEB-DL
  audioChannels: "5.1"
  audioCodec: DolbyDigital
  videoCodec: h264
 ? Game of Thrones S03E06 1080i HDTV DD5.1 MPEG2-TrollHD.ts
 : series: Game of Thrones
  season: 3
  episodeNumber: 6
  screenSize: 1080i
  format: HDTV
  audioChannels: "5.1"
  audioCodec: DolbyDigital
  videoCodec: MPEG2
  releaseGroup: TrollHD
 ? gossip.girl.s01e18.hdtv.xvid-2hd.eng.srt
 : series: gossip girl
  season: 1
  episodeNumber: 18
  format: HDTV
  videoCodec: XviD
  releaseGroup: 2HD
  subtitleLanguage: english
 ? Wheels.S03E01E02.720p.HDTV.x264-IMMERSE.mkv
 : series: Wheels
  season: 3
  episodeNumber: 1
  episodeList: [1, 2]
  screenSize: 720p
  format: HDTV
  videoCodec: h264
  releaseGroup: IMMERSE
 ? Wheels.S03E01-02.720p.HDTV.x264-IMMERSE.mkv
 : series: Wheels
  season: 3
  episodeNumber: 1
  episodeList: [1, 2]
  screenSize: 720p
  format: HDTV
  videoCodec: h264
  releaseGroup: IMMERSE
 ? Wheels.S03E01-E02.720p.HDTV.x264-IMMERSE.mkv
 : series: Wheels
  season: 3
  episodeNumber: 1
  episodeList: [1, 2]
  screenSize: 720p
  format: HDTV
  videoCodec: h264
  releaseGroup: IMMERSE
 ? Wheels.S03E01-03.720p.HDTV.x264-IMMERSE.mkv
 : series: Wheels
  season: 3
  episodeNumber: 1
  episodeList: [1, 2, 3]
  screenSize: 720p
  format: HDTV
  videoCodec: h264
  releaseGroup: IMMERSE
 ? Marvels.Agents.of.S.H.I.E.L.D.S01E06.720p.HDTV.X264-DIMENSION.mkv
 : series: Marvels Agents of S.H.I.E.L.D.
  season: 1
  episodeNumber: 6
  screenSize: 720p
  format: HDTV
  videoCodec: h264
  releaseGroup: DIMENSION
 ? Marvels.Agents.of.S.H.I.E.L.D..S01E06.720p.HDTV.X264-DIMENSION.mkv
 : series: Marvels Agents of S.H.I.E.L.D.
  season: 1
  episodeNumber: 6
  screenSize: 720p
  format: HDTV
  videoCodec: h264
  releaseGroup: DIMENSION
 ? Series/Friday Night Lights/Season 1/Friday Night Lights S01E19 - Ch-Ch-Ch-Ch-Changes.avi
 : series: Friday Night Lights
  season: 1
  episodeNumber: 19
  title: Ch-Ch-Ch-Ch-Changes
 ? Dexter Saison VII FRENCH.BDRip.XviD-MiND.nfo
 : series: Dexter
  season: 7
  videoCodec: XviD
  language: French
  format: BluRay
  releaseGroup: MiND
 ? Dexter Saison sept FRENCH.BDRip.XviD-MiND.nfo
 : series: Dexter
  season: 7
  videoCodec: XviD
  language: French
  format: BluRay
  releaseGroup: MiND
 ? "Pokémon S16 - E29 - 1280*720 HDTV VF.mkv"
 : series: Pokémon
  format: HDTV
  language: French
  season: 16
  episodeNumber: 29
  screenSize: 720p
 ? One.Piece.E576.VOSTFR.720p.HDTV.x264-MARINE-FORD.mkv
 : episodeNumber: 576
  videoCodec: h264
  format: HDTV
  series: One Piece
  releaseGroup: MARINE-FORD
  subtitleLanguage: French
  screenSize: 720p
 ? Dexter.S08E12.FINAL.MULTi.1080p.BluRay.x264-MiND.mkv
 : videoCodec: h264
  episodeNumber: 12
  season: 8
  format: BluRay
  series: Dexter
  other: final
  language: Multiple languages
  releaseGroup: MiND
  screenSize: 1080p
 ? One Piece - E623 VOSTFR HD [www.manga-ddl-free.com].mkv
 : website: www.manga-ddl-free.com
  episodeNumber: 623
  subtitleLanguage: French
  series: One Piece
  other: HD
 ? Falling Skies Saison 1.HDLight.720p.x264.VFF.mkv
 : language: French
  screenSize: 720p
  season: 1
  series: Falling Skies
  videoCodec: h264
 ? Sleepy.Hollow.S01E09.720p.WEB-DL.DD5.1.H.264-BP.mkv
 : episodeNumber: 9
  videoCodec: h264
  format: WEB-DL
  series: Sleepy Hollow
  audioChannels: "5.1"
  screenSize: 720p
  season: 1
  videoProfile: BP
  audioCodec: DolbyDigital
 ? Sleepy.Hollow.S01E09.720p.WEB-DL.DD5.1.H.264-BS.mkv
 : episodeNumber: 9
  videoCodec: h264
  format: WEB-DL
  series: Sleepy Hollow
  audioChannels: "5.1"
  screenSize: 720p
  season: 1
  releaseGroup: BS
  audioCodec: DolbyDigital
 ? Battlestar.Galactica.S00.Pilot.FRENCH.DVDRip.XviD-NOTAG.avi
 : series: Battlestar Galactica
  season: 0
  title: Pilot
  special: Pilot
  language: French
  format: DVD
  videoCodec: XviD
  releaseGroup: NOTAG
 ? The Big Bang Theory S00E00 Unaired Pilot VOSTFR TVRip XviD-VioCs
 : options: -n
  series: The Big Bang Theory
  season: 0
  episodeNumber: 0
  subtitleLanguage: French
  format: TV
  videoCodec: XviD
  releaseGroup: VioCs
  special: [Unaired, Pilot]
  title: Unaired Pilot
 ? The Big Bang Theory S01E00 PROPER Unaired Pilot TVRip XviD-GIGGITY
 : options: -n
  series: The Big Bang Theory
  season: 1
  episodeNumber: 0
  format: TV
  videoCodec: XviD
  releaseGroup: GIGGITY
  other: proper
  special: [Unaired, Pilot]
  title: Unaired Pilot
--- a/lib/guessit/test/guessittest.py
+++ b/lib/guessit/test/guessittest.py
@ -1,168 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit import base_text_type, u
 from unittest import TestCase, TestLoader, TextTestRunner
 import shlex
 import yaml, logging, sys, os
 from os.path import *
 def currentPath():
    '''Returns the path in which the calling file is located.'''
    return dirname(join(os.getcwd(), sys._getframe(1).f_globals['__file__']))
 def addImportPath(path):
    '''Function that adds the specified path to the import path. The path can be
    absolute or relative to the calling file.'''
    importPath = abspath(join(currentPath(), path))
    sys.path = [importPath] + sys.path
 log = logging.getLogger(__name__)
 from guessit.plugins import transformers
 import guessit
 from guessit.options import option_parser
 from guessit import *
 from guessit.matcher import *
 from guessit.fileutils import *
 def allTests(testClass):
    return TestLoader().loadTestsFromTestCase(testClass)
 class TestGuessit(TestCase):
    def checkMinimumFieldsCorrect(self, filename, filetype=None, remove_type=True,
                                  exclude_files=None):
        groundTruth = yaml.load(load_file_in_same_dir(__file__, filename))
        def guess_func(string, options=None):
            return guess_file_info(string, options=options, type=filetype)
        return self.checkFields(groundTruth, guess_func, remove_type, exclude_files)
    def checkFields(self, groundTruth, guess_func, remove_type=True,
                    exclude_files=None):
        total = 0
        exclude_files = exclude_files or []
        fails = {}
        additionals = {}
        for filename, required_fields in groundTruth.items():
            filename = u(filename)
            if filename in exclude_files:
                continue
            log.debug('\n' + '-' * 120)
            log.info('Guessing information for file: %s' % filename)
            options = required_fields.pop('options') if 'options' in required_fields else None
            if options:
                args = shlex.split(options)
                options, _ = option_parser.parse_args(args)
                options = vars(options)
            found = guess_func(filename, options)
            total = total + 1
            # no need for these in the unittests
            if remove_type:
                try:
                    del found['type']
                except:
                    pass
            for prop in ('container', 'mimetype'):
                if prop in found:
                    del found[prop]
            # props which are list of just 1 elem should be opened for easier writing of the tests
            for prop in ('language', 'subtitleLanguage', 'other', 'special'):
                value = found.get(prop, None)
                if isinstance(value, list) and len(value) == 1:
                    found[prop] = value[0]
            # look for missing properties
            for prop, value in required_fields.items():
                if prop not in found:
                    log.debug("Prop '%s' not found in: %s" % (prop, filename))
                    if not filename in fails:
                        fails[filename] = [] 
                    fails[filename].append("'%s' not found in: %s" % (prop, filename))
                    continue
                # if both properties are strings, do a case-insensitive comparison
                if (isinstance(value, base_text_type) and
                    isinstance(found[prop], base_text_type)):
                    if value.lower() != found[prop].lower():
                        log.debug("Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        if not filename in fails:
                            fails[filename] = [] 
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                # if both are lists, we assume list of strings and do a case-insensitive
                # comparison on their elements
                elif isinstance(value, list) and isinstance(found[prop], list):
                    s1 = set(u(s).lower() for s in value)
                    s2 = set(u(s).lower() for s in found[prop])
                    if s1 != s2:
                        log.debug("Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        if not filename in fails:
                            fails[filename] = [] 
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                # otherwise, just compare their values directly
                else:
                    if found[prop] != value:
                        log.debug("Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
                        if not filename in fails:
                            fails[filename] = [] 
                        fails[filename].append("'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
            # look for additional properties
            for prop, value in found.items():
                if prop not in required_fields:
                    log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value)))
                    if not filename in additionals:
                        additionals[filename] = [] 
                    additionals[filename].append("'%s': '%s'" % (prop, u(value)))
        correct = total - len(fails)
        log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total))
        for failed_entry, failed_properties in fails.items():
            log.error('---- ' + failed_entry + ' ----')
            for failed_property in failed_properties:
                log.error("FAILED: " + failed_property)
        for additional_entry, additional_properties in additionals.items():
            log.warn('---- ' + additional_entry + ' ----')
            for additional_property in additional_properties:
                log.warn("ADDITIONAL: " + additional_property)
        self.assertTrue(correct == total,
                        msg='Correct: %d < Total: %d' % (correct, total))
--- a/lib/guessit/test/movies.yaml
+++ b/lib/guessit/test/movies.yaml
@ -1,626 +0,0 @@
 ? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
 : title: Fear and Loathing in Las Vegas
  year: 1998
  screenSize: 720p
  format: HD-DVD
  audioCodec: DTS
  videoCodec: h264
  releaseGroup: ESiR
 ? Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi
 : title: El Dia de la Bestia
  year: 1995
  format: DVD
  language: spanish
  videoCodec: DivX
 ? Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
 : title: Dark City
  year: 1998
  format: BluRay
  screenSize: 720p
  audioCodec: DTS
  videoCodec: h264
  releaseGroup: CHD
 ? Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv
 : title: Sin City
  year: 2005
  format: BluRay
  screenSize: 720p
  videoCodec: h264
  audioCodec: AC3
  releaseGroup: SEPTiC
 ? Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi
 : title: Borat
  year: 2006
  other: PROPER
  format: DVD
  other: [ R5, Proper ]
  videoCodec: XviD
  releaseGroup: PUKKA
 ? "[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
 : title: Le Prestige
  format: DVD
  videoCodec: h264
  videoProfile: HP
  audioCodec: AAC
  audioProfile: HE
  language: [ french, english ]
  subtitleLanguage: [ french, english ]
 ? Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi
 : title: Battle Royale
  year: 2000
  edition: special edition
  cdNumber: 1
  cdNumberTotal: 2
  format: DVD
  videoCodec: XviD
  releaseGroup: ZeaL
 ? Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.avi
 : title: Brazil
  edition: Criterion Edition
  year: 1985
  cdNumber: 2
 ? Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv
 : title: Persepolis
  year: 2007
  videoCodec: h264
  audioCodec: AAC
  language: [ French, English ]
  subtitleLanguage: [ French, English ]
 ? Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv
 : title: Toy Story
  year: 1995
  format: HDTV
  screenSize: 720p
  language: [ english, spanish ]
 ? Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi
 : title: Office Space
  year: 1999
  format: DVD
  language: [ english, spanish ]
  videoCodec: XviD
  audioCodec: AC3
 ? Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.avi
 : title: Wild Zero
  year: 2000
  videoCodec: DivX
  releaseGroup: EPiC
 ? movies/Baraka_Edition_Collector.avi
 : title: Baraka
  edition: collector edition
 ? Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director's.Cut).CD1.DVDRip.XviD.AC3-WAF.avi
 : title: Blade Runner
  year: 1982
  edition: Director's Cut
  cdNumber: 1
  format: DVD
  videoCodec: XviD
  audioCodec: AC3
  releaseGroup: WAF
 ? movies/American.The.Bill.Hicks.Story.2009.DVDRip.XviD-EPiSODE.[UsaBit.com]/UsaBit.com_esd-americanbh.avi
 : title: American The Bill Hicks Story
  year: 2009
  format: DVD
  videoCodec: XviD
  releaseGroup: EPiSODE
  website: UsaBit.com
 ? movies/Charlie.And.Boots.DVDRip.XviD-TheWretched/wthd-cab.avi
 : title: Charlie And Boots
  format: DVD
  videoCodec: XviD
  releaseGroup: TheWretched
 ? movies/Steig Larsson Millenium Trilogy (2009) BRrip 720 AAC x264/(1)The Girl With The Dragon Tattoo (2009) BRrip 720 AAC x264.mkv
 : title: The Girl With The Dragon Tattoo
  filmSeries: Steig Larsson Millenium Trilogy
  filmNumber: 1
  year: 2009
  format: BluRay
  audioCodec: AAC
  videoCodec: h264
  screenSize: 720p
 ? movies/Greenberg.REPACK.LiMiTED.DVDRip.XviD-ARROW/arw-repack-greenberg.dvdrip.xvid.avi
 : title: Greenberg
  format: DVD
  videoCodec: XviD
  releaseGroup: ARROW
  other: ['Proper', 'Limited']
 ? Movies/Fr - Paris 2054, Renaissance (2005) - De Christian Volckman - (Film Divx Science Fiction Fantastique Thriller Policier N&B).avi
 : title: Paris 2054, Renaissance
  year: 2005
  language: french
  videoCodec: DivX
 ? Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
 : title: Avida
  year: 2006
  language: french
  format: DVD
  videoCodec: XviD
  releaseGroup: PROD
 ? Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
 : title: Alice in Wonderland
  format: DVD
  videoCodec: XviD
  releaseGroup: DiAMOND
 ? Movies/Ne.Le.Dis.A.Personne.Fr 2 cd/personnea_mp.avi
 : title: Ne Le Dis A Personne
  language: french
  cdNumberTotal: 2
 ? Movies/Bunker Palace Hôtel (Enki Bilal) (1989)/Enki Bilal - Bunker Palace Hotel (Fr Vhs Rip).avi
 : title: Bunker Palace Hôtel
  year: 1989
  language: french
  format: VHS
 ? Movies/21 (2008)/21.(2008).DVDRip.x264.AC3-FtS.[sharethefiles.com].mkv
 : title: "21"
  year: 2008
  format: DVD
  videoCodec: h264
  audioCodec: AC3
  releaseGroup: FtS
  website: sharethefiles.com
 ? Movies/9 (2009)/9.2009.Blu-ray.DTS.720p.x264.HDBRiSe.[sharethefiles.com].mkv
 : title: "9"
  year: 2009
  format: BluRay
  audioCodec: DTS
  screenSize: 720p
  videoCodec: h264
  releaseGroup: HDBRiSe
  website: sharethefiles.com
 ? Movies/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam.avi
 : title: Mamma Mia
  year: 2008
  format: DVD
  audioCodec: AC3
  videoCodec: XviD
  releaseGroup: CrazyTeam
 ? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
 : title: M.A.S.H.
  year: 1970
  videoCodec: DivX
  format: DVD
 ? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
 : title: The Doors
  year: 1991
  date: 2008-03-09
  format: BluRay
  screenSize: 720p
  audioCodec: AC3
  videoCodec: h264
  releaseGroup: HiS@SiLUHD
  language: english
  website: sharethefiles.com
 ? Movies/Ratatouille/video_ts-ratatouille.srt
 : title: Ratatouille
  format: DVD
 ? Movies/001 __ A classer/Fantomas se déchaine - Louis de Funès.avi
 : title: Fantomas se déchaine
 ? Movies/Comme une Image (2004)/Comme.Une.Image.FRENCH.DVDRiP.XViD-NTK.par-www.divx-overnet.com.avi
 : title: Comme une Image
  year: 2004
  language: french
  format: DVD
  videoCodec: XviD
  releaseGroup: NTK
  website: www.divx-overnet.com
 ? Movies/Fantastic Mr Fox/Fantastic.Mr.Fox.2009.DVDRip.{x264+LC-AAC.5.1}{Fr-Eng}{Sub.Fr-Eng}-™.[sharethefiles.com].mkv
 : title: Fantastic Mr Fox
  year: 2009
  format: DVD
  videoCodec: h264
  audioCodec: AAC
  audioProfile: LC
  audioChannels: "5.1"
  language: [ french, english ]
  subtitleLanguage: [ french, english ]
  website: sharethefiles.com
 ? Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi
 : title: Somewhere
  year: 2010
  format: DVD
  videoCodec: XviD
  releaseGroup: iLG
 ? Movies/Moon_(2009).mkv
 : title: Moon
  year: 2009
 ? Movies/Moon_(2009)-x01.mkv
 : title: Moon
  year: 2009
  bonusNumber: 1
 ? Movies/Moon_(2009)-x02-Making_Of.mkv
 : title: Moon
  year: 2009
  bonusNumber: 2
  bonusTitle: Making Of
 ? movies/James_Bond-f17-Goldeneye.mkv
 : title: Goldeneye
  filmSeries: James Bond
  filmNumber: 17
 ? /movies/James_Bond-f21-Casino_Royale.mkv
 : title: Casino Royale
  filmSeries: James Bond
  filmNumber: 21
 ? /movies/James_Bond-f21-Casino_Royale-x01-Becoming_Bond.mkv
 : title: Casino Royale
  filmSeries: James Bond
  filmNumber: 21
  bonusNumber: 1
  bonusTitle: Becoming Bond
 ? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
 : title: Casino Royale
  filmSeries: James Bond
  filmNumber: 21
  bonusNumber: 2
  bonusTitle: Stunts
 ? OSS_117--Cairo,_Nest_of_Spies.mkv
 : title: OSS 117--Cairo, Nest of Spies
 ? The Godfather Part III.mkv
 : title: The Godfather Part III
 ? Foobar Part VI.mkv
 : title: Foobar Part VI
 ? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
 : title: The Insider
  year: 1999
  bonusNumber: 2
  bonusTitle: 60 Minutes Interview-1996
 ? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
 : title: Rush Beyond The Lighted Stage
  bonusNumber: 9
  bonusTitle: Between Sun and Moon-2002 Hartford
 ? /public/uTorrent/Downloads Finished/Movies/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX.mkv
 : title: Indiana Jones and the Temple of Doom
  year: 1984
  format: HDTV
  screenSize: 720p
  videoCodec: h264
  audioCodec: AC3
  audioChannels: "5.1"
  releaseGroup: REDµX
 ? The.Director’s.Notebook.2006.Blu-Ray.x264.DXVA.720p.AC3-de[42].mkv
 : title: The Director’s Notebook
  year: 2006
  format: BluRay
  videoCodec: h264
  videoApi: DXVA
  screenSize: 720p
  audioCodec: AC3
  releaseGroup: de[42]
 ? Movies/Cosmopolis.2012.LiMiTED.720p.BluRay.x264-AN0NYM0US[bb]/ano-cosmo.720p.mkv
 : title: Cosmopolis
  year: 2012
  screenSize: 720p
  videoCodec: h264
  releaseGroup: AN0NYM0US[bb]
  format: BluRay
  other: LIMITED
 ? movies/La Science des Rêves (2006)/La.Science.Des.Reves.FRENCH.DVDRip.XviD-MP-AceBot.avi
 : title: La Science des Rêves
  year: 2006
  format: DVD
  videoCodec: XviD
  videoProfile: MP
  releaseGroup: AceBot
  language: French
 ? The_Italian_Job.mkv
 : title: The Italian Job
 ? The.Rum.Diary.2011.1080p.BluRay.DTS.x264.D-Z0N3.mkv
 : title: The Rum Diary
  year: 2011
  screenSize: 1080p
  format: BluRay
  videoCodec: h264
  audioCodec: DTS
  releaseGroup: D-Z0N3
 ? Life.Of.Pi.2012.1080p.BluRay.DTS.x264.D-Z0N3.mkv
 : title: Life Of Pi
  year: 2012
  screenSize: 1080p
  format: BluRay
  videoCodec: h264
  audioCodec: DTS
  releaseGroup: D-Z0N3
 ? The.Kings.Speech.2010.1080p.BluRay.DTS.x264.D Z0N3.mkv
 : title: The Kings Speech
  year: 2010
  screenSize: 1080p
  format: BluRay
  audioCodec: DTS
  videoCodec: h264
  releaseGroup: D-Z0N3
 ? Street.Kings.2008.BluRay.1080p.DTS.x264.dxva EuReKA.mkv
 : title: Street Kings
  year: 2008
  format: BluRay
  screenSize: 1080p
  audioCodec: DTS
  videoCodec: h264
  videoApi: DXVA
  releaseGroup: EuReKa
 ? 2001.A.Space.Odyssey.1968.HDDVD.1080p.DTS.x264.dxva EuReKA.mkv
 : title: 2001 A Space Odyssey
  year: 1968
  format: HD-DVD
  screenSize: 1080p
  audioCodec: DTS
  videoCodec: h264
  videoApi: DXVA
  releaseGroup: EuReKa
 ? 2012.2009.720p.BluRay.x264.DTS WiKi.mkv
 : title: "2012"
  year: 2009
  screenSize: 720p
  format: BluRay
  videoCodec: h264
  audioCodec: DTS
  releaseGroup: WiKi
 ? /share/Download/movie/Dead Man Down (2013) BRRiP XViD DD5_1 Custom NLSubs =-_lt Q_o_Q gt-=_/XD607ebb-BRc59935-5155473f-1c5f49/XD607ebb-BRc59935-5155473f-1c5f49.avi
 : title: Dead Man Down
  year: 2013
  format: BluRay
  videoCodec: XviD
  audioChannels: "5.1"
  audioCodec: DolbyDigital
  idNumber: XD607ebb-BRc59935-5155473f-1c5f49
 ? Pacific.Rim.3D.2013.COMPLETE.BLURAY-PCH.avi
 : title: Pacific Rim
  year: 2013
  format: BluRay
  other:
   - complete
   - 3D
  releaseGroup: PCH
 ? Immersion.French.2011.STV.READNFO.QC.FRENCH.ENGLISH.NTSC.DVDR.nfo
 : title: Immersion French
  year: 2011
  language:
    - French
    - English
 ? Immersion.French.2011.STV.READNFO.QC.FRENCH.NTSC.DVDR.nfo
 : title: Immersion French
  year: 2011
  language: French
 ? Immersion.French.2011.STV.READNFO.QC.NTSC.DVDR.nfo
 : title: Immersion French
  year: 2011
 ? French.Immersion.2011.STV.READNFO.QC.ENGLISH.NTSC.DVDR.nfo
 : title: French Immersion
  year: 2011
  language: ENGLISH
 ? Howl's_Moving_Castle_(2004)_[720p,HDTV,x264,DTS]-FlexGet.avi
 : videoCodec: h264
  format: HDTV
  title: Howl's Moving Castle
  screenSize: 720p
  year: 2004
  audioCodec: DTS
  releaseGroup: FlexGet
 ? Pirates de langkasuka.2008.FRENCH.1920X1080.h264.AVC.AsiaRa.mkv
 : screenSize: 1080p
  year: 2008
  language: French
  videoCodec: h264
  title: Pirates de langkasuka
  releaseGroup: AsiaRa
 ? Masala (2013) Telugu Movie HD DVDScr XviD - Exclusive.avi
 : year: 2013
  videoCodec: XviD
  title: Masala
  format: HD-DVD
  other: screener
  language: Telugu
  releaseGroup: Exclusive
 ? Django Unchained 2012 DVDSCR X264 AAC-P2P.nfo
 : year: 2012
  other: screener
  videoCodec: h264
  title: Django Unchained
  audioCodec: AAC
  format: DVD
  releaseGroup: P2P
 ? Ejecutiva.En.Apuros(2009).BLURAY.SCR.Xvid.Spanish.LanzamientosD.nfo
 : year: 2009
  other: screener
  format: BluRay
  videoCodec: XviD
  language: Spanish
  title: Ejecutiva En Apuros
 ? Die.Schluempfe.2.German.DL.1080p.BluRay.x264-EXQUiSiTE.mkv
 : title: Die Schluempfe 2
  format: BluRay
  language:
    - Multiple languages
    - German
  videoCodec: h264
  releaseGroup: EXQUiSiTE
  screenSize: 1080p
 ? Rocky 1976 French SubForced BRRip x264 AC3-FUNKY.mkv
 : title: Rocky
  year: 1976
  subtitleLanguage: French
  format: BluRay
  videoCodec: h264
  audioCodec: AC3
  releaseGroup: FUNKY
 ? REDLINE (BD 1080p H264 10bit FLAC) [3xR].mkv
 : title: REDLINE
  format: BluRay
  videoCodec: h264
  videoProfile: 10bit
  audioCodec: Flac
  screenSize: 1080p
 ? The.Lizzie.McGuire.Movie.(2003).HR.DVDRiP.avi
 : title: The Lizzie McGuire Movie
  year: 2003
  screenSize: 480p
  format: DVD
 ? Hua.Mulan.BRRIP.MP4.x264.720p-HR.avi
 : title: Hua Mulan
  videoCodec: h264
  format: BluRay
  screenSize: 720p
 ? Dr.Seuss.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
 : videoCodec: XviD
  title: Dr Seuss The Lorax
  format: DVD
  other: LiNE
  year: 2012
  audioCodec: AC3
  audioProfile: HQ
  releaseGroup: Hive-CM8
 ? "Star Wars: Episode IV - A New Hope (2004) Special Edition.MKV"
 : title: Star Wars Episode IV
  year: 2004
  edition: Special Edition
 ? Dr.LiNE.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
 : videoCodec: XviD
  title: Dr LiNE The Lorax
  format: DVD
  other: LiNE
  year: 2012
  audioCodec: AC3
  audioProfile: HQ
  releaseGroup: Hive-CM8
 ? Perfect Child-2007-TRUEFRENCH-TVRip.Xvid-h@mster.avi
 : releaseGroup: h@mster
  title: Perfect Child
  videoCodec: XviD
  language: French
  format: TV
  year: 2007
 ? entre.ciel.et.terre.(1994).dvdrip.h264.aac-psypeon.avi
 : audioCodec: AAC
  format: DVD
  releaseGroup: psypeon
  title: entre ciel et terre
  videoCodec: h264
  year: 1994
 ? Yves.Saint.Laurent.2013.FRENCH.DVDSCR.MD.XviD-ViVARiUM.avi
 : format: DVD
  language: French
  other: Screener
  releaseGroup: ViVARiUM
  title: Yves Saint Laurent
  videoCodec: XviD
  year: 2013
 ? Echec et Mort - Hard to Kill - Steven Seagal Multi 1080p BluRay x264 CCATS.avi
 : format: BluRay
  language: Multiple languages
  releaseGroup: CCATS
  screenSize: 1080p
  title: Echec et Mort
  videoCodec: h264
 ? Paparazzi - Timsit/Lindon (MKV 1080p tvripHD)
 : options: -n
  title: Paparazzi
  screenSize: 1080p
  format: HDTV
 ? some.movie.720p.bluray.x264-mind
 : options: -n
  title: some movie
  screenSize: 720p
  videoCodec: h264
  releaseGroup: mind
  format: BluRay
 ? Dr LiNE The Lorax 720p h264 BluRay
 : options: -n
  title: Dr LiNE The Lorax
  screenSize: 720p
  videoCodec: h264
  format: BluRay
 ? BeatdownFrenchDVDRip.mkv
 : title: Beatdown
  language: French
  format: DVD
 ? YvesSaintLaurent2013FrenchDVDScrXvid.avi
 : format: DVD
  language: French
  other: Screener
  title: Yves saint laurent
  videoCodec: XviD
  year: 2013
--- a/lib/guessit/test/opensubtitles_languages_2012_05_09.txt
+++ b/lib/guessit/test/opensubtitles_languages_2012_05_09.txt
@ -1,473 +0,0 @@
 IdSubLanguage	ISO639	LanguageName	UploadEnabled	WebEnabled
 aar	aa	Afar, afar	0	0
 abk	ab	Abkhazian	0	0
 ace		Achinese	0	0
 ach		Acoli	0	0
 ada		Adangme	0	0
 ady		adyghé	0	0
 afa		Afro-Asiatic (Other)	0	0
 afh		Afrihili	0	0
 afr	af	Afrikaans	0	0
 ain		Ainu	0	0
 aka	ak	Akan	0	0
 akk		Akkadian	0	0
 alb	sq	Albanian	1	1
 ale		Aleut	0	0
 alg		Algonquian languages	0	0
 alt		Southern Altai	0	0
 amh	am	Amharic	0	0
 ang		English, Old (ca.450-1100)	0	0
 apa		Apache languages	0	0
 ara	ar	Arabic	1	1
 arc		Aramaic	0	0
 arg	an	Aragonese	0	0
 arm	hy	Armenian	1	0
 arn		Araucanian	0	0
 arp		Arapaho	0	0
 art		Artificial (Other)	0	0
 arw		Arawak	0	0
 asm	as	Assamese	0	0
 ast		Asturian, Bable	0	0
 ath		Athapascan languages	0	0
 aus		Australian languages	0	0
 ava	av	Avaric	0	0
 ave	ae	Avestan	0	0
 awa		Awadhi	0	0
 aym	ay	Aymara	0	0
 aze	az	Azerbaijani	0	0
 bad		Banda	0	0
 bai		Bamileke languages	0	0
 bak	ba	Bashkir	0	0
 bal		Baluchi	0	0
 bam	bm	Bambara	0	0
 ban		Balinese	0	0
 baq	eu	Basque	1	1
 bas		Basa	0	0
 bat		Baltic (Other)	0	0
 bej		Beja	0	0
 bel	be	Belarusian	0	0
 bem		Bemba	0	0
 ben	bn	Bengali	1	0
 ber		Berber (Other)	0	0
 bho		Bhojpuri	0	0
 bih	bh	Bihari	0	0
 bik		Bikol	0	0
 bin		Bini	0	0
 bis	bi	Bislama	0	0
 bla		Siksika	0	0
 bnt		Bantu (Other)	0	0
 bos	bs	Bosnian	1	0
 bra		Braj	0	0
 bre	br	Breton	1	0
 btk		Batak (Indonesia)	0	0
 bua		Buriat	0	0
 bug		Buginese	0	0
 bul	bg	Bulgarian	1	1
 bur	my	Burmese	0	0
 byn		Blin	0	0
 cad		Caddo	0	0
 cai		Central American Indian (Other)	0	0
 car		Carib	0	0
 cat	ca	Catalan	1	1
 cau		Caucasian (Other)	0	0
 ceb		Cebuano	0	0
 cel		Celtic (Other)	0	0
 cha	ch	Chamorro	0	0
 chb		Chibcha	0	0
 che	ce	Chechen	0	0
 chg		Chagatai	0	0
 chi	zh	Chinese	1	1
 chk		Chuukese	0	0
 chm		Mari	0	0
 chn		Chinook jargon	0	0
 cho		Choctaw	0	0
 chp		Chipewyan	0	0
 chr		Cherokee	0	0
 chu	cu	Church Slavic	0	0
 chv	cv	Chuvash	0	0
 chy		Cheyenne	0	0
 cmc		Chamic languages	0	0
 cop		Coptic	0	0
 cor	kw	Cornish	0	0
 cos	co	Corsican	0	0
 cpe		Creoles and pidgins, English based (Other)	0	0
 cpf		Creoles and pidgins, French-based (Other)	0	0
 cpp		Creoles and pidgins, Portuguese-based (Other)	0	0
 cre	cr	Cree	0	0
 crh		Crimean Tatar	0	0
 crp		Creoles and pidgins (Other)	0	0
 csb		Kashubian	0	0
 cus		Cushitic (Other)' couchitiques, autres langues	0	0
 cze	cs	Czech	1	1
 dak		Dakota	0	0
 dan	da	Danish	1	1
 dar		Dargwa	0	0
 day		Dayak	0	0
 del		Delaware	0	0
 den		Slave (Athapascan)	0	0
 dgr		Dogrib	0	0
 din		Dinka	0	0
 div	dv	Divehi	0	0
 doi		Dogri	0	0
 dra		Dravidian (Other)	0	0
 dua		Duala	0	0
 dum		Dutch, Middle (ca.1050-1350)	0	0
 dut	nl	Dutch	1	1
 dyu		Dyula	0	0
 dzo	dz	Dzongkha	0	0
 efi		Efik	0	0
 egy		Egyptian (Ancient)	0	0
 eka		Ekajuk	0	0
 elx		Elamite	0	0
 eng	en	English	1	1
 enm		English, Middle (1100-1500)	0	0
 epo	eo	Esperanto	1	0
 est	et	Estonian	1	1
 ewe	ee	Ewe	0	0
 ewo		Ewondo	0	0
 fan		Fang	0	0
 fao	fo	Faroese	0	0
 fat		Fanti	0	0
 fij	fj	Fijian	0	0
 fil		Filipino	0	0
 fin	fi	Finnish	1	1
 fiu		Finno-Ugrian (Other)	0	0
 fon		Fon	0	0
 fre	fr	French	1	1
 frm		French, Middle (ca.1400-1600)	0	0
 fro		French, Old (842-ca.1400)	0	0
 fry	fy	Frisian	0	0
 ful	ff	Fulah	0	0
 fur		Friulian	0	0
 gaa		Ga	0	0
 gay		Gayo	0	0
 gba		Gbaya	0	0
 gem		Germanic (Other)	0	0
 geo	ka	Georgian	1	1
 ger	de	German	1	1
 gez		Geez	0	0
 gil		Gilbertese	0	0
 gla	gd	Gaelic	0	0
 gle	ga	Irish	0	0
 glg	gl	Galician	1	1
 glv	gv	Manx	0	0
 gmh		German, Middle High (ca.1050-1500)	0	0
 goh		German, Old High (ca.750-1050)	0	0
 gon		Gondi	0	0
 gor		Gorontalo	0	0
 got		Gothic	0	0
 grb		Grebo	0	0
 grc		Greek, Ancient (to 1453)	0	0
 ell	el	Greek	1	1
 grn	gn	Guarani	0	0
 guj	gu	Gujarati	0	0
 gwi		Gwich´in	0	0
 hai		Haida	0	0
 hat	ht	Haitian	0	0
 hau	ha	Hausa	0	0
 haw		Hawaiian	0	0
 heb	he	Hebrew	1	1
 her	hz	Herero	0	0
 hil		Hiligaynon	0	0
 him		Himachali	0	0
 hin	hi	Hindi	1	1
 hit		Hittite	0	0
 hmn		Hmong	0	0
 hmo	ho	Hiri Motu	0	0
 hrv	hr	Croatian	1	1
 hun	hu	Hungarian	1	1
 hup		Hupa	0	0
 iba		Iban	0	0
 ibo	ig	Igbo	0	0
 ice	is	Icelandic	1	1
 ido	io	Ido	0	0
 iii	ii	Sichuan Yi	0	0
 ijo		Ijo	0	0
 iku	iu	Inuktitut	0	0
 ile	ie	Interlingue	0	0
 ilo		Iloko	0	0
 ina	ia	Interlingua (International Auxiliary Language Asso	0	0
 inc		Indic (Other)	0	0
 ind	id	Indonesian	1	1
 ine		Indo-European (Other)	0	0
 inh		Ingush	0	0
 ipk	ik	Inupiaq	0	0
 ira		Iranian (Other)	0	0
 iro		Iroquoian languages	0	0
 ita	it	Italian	1	1
 jav	jv	Javanese	0	0
 jpn	ja	Japanese	1	1
 jpr		Judeo-Persian	0	0
 jrb		Judeo-Arabic	0	0
 kaa		Kara-Kalpak	0	0
 kab		Kabyle	0	0
 kac		Kachin	0	0
 kal	kl	Kalaallisut	0	0
 kam		Kamba	0	0
 kan	kn	Kannada	0	0
 kar		Karen	0	0
 kas	ks	Kashmiri	0	0
 kau	kr	Kanuri	0	0
 kaw		Kawi	0	0
 kaz	kk	Kazakh	1	0
 kbd		Kabardian	0	0
 kha		Khasi	0	0
 khi		Khoisan (Other)	0	0
 khm	km	Khmer	1	1
 kho		Khotanese	0	0
 kik	ki	Kikuyu	0	0
 kin	rw	Kinyarwanda	0	0
 kir	ky	Kirghiz	0	0
 kmb		Kimbundu	0	0
 kok		Konkani	0	0
 kom	kv	Komi	0	0
 kon	kg	Kongo	0	0
 kor	ko	Korean	1	1
 kos		Kosraean	0	0
 kpe		Kpelle	0	0
 krc		Karachay-Balkar	0	0
 kro		Kru	0	0
 kru		Kurukh	0	0
 kua	kj	Kuanyama	0	0
 kum		Kumyk	0	0
 kur	ku	Kurdish	0	0
 kut		Kutenai	0	0
 lad		Ladino	0	0
 lah		Lahnda	0	0
 lam		Lamba	0	0
 lao	lo	Lao	0	0
 lat	la	Latin	0	0
 lav	lv	Latvian	1	0
 lez		Lezghian	0	0
 lim	li	Limburgan	0	0
 lin	ln	Lingala	0	0
 lit	lt	Lithuanian	1	0
 lol		Mongo	0	0
 loz		Lozi	0	0
 ltz	lb	Luxembourgish	1	0
 lua		Luba-Lulua	0	0
 lub	lu	Luba-Katanga	0	0
 lug	lg	Ganda	0	0
 lui		Luiseno	0	0
 lun		Lunda	0	0
 luo		Luo (Kenya and Tanzania)	0	0
 lus		lushai	0	0
 mac	mk	Macedonian	1	1
 mad		Madurese	0	0
 mag		Magahi	0	0
 mah	mh	Marshallese	0	0
 mai		Maithili	0	0
 mak		Makasar	0	0
 mal	ml	Malayalam	0	0
 man		Mandingo	0	0
 mao	mi	Maori	0	0
 map		Austronesian (Other)	0	0
 mar	mr	Marathi	0	0
 mas		Masai	0	0
 may	ms	Malay	1	1
 mdf		Moksha	0	0
 mdr		Mandar	0	0
 men		Mende	0	0
 mga		Irish, Middle (900-1200)	0	0
 mic		Mi'kmaq	0	0
 min		Minangkabau	0	0
 mis		Miscellaneous languages	0	0
 mkh		Mon-Khmer (Other)	0	0
 mlg	mg	Malagasy	0	0
 mlt	mt	Maltese	0	0
 mnc		Manchu	0	0
 mni		Manipuri	0	0
 mno		Manobo languages	0	0
 moh		Mohawk	0	0
 mol	mo	Moldavian	0	0
 mon	mn	Mongolian	1	0
 mos		Mossi	0	0
 mwl		Mirandese	0	0
 mul		Multiple languages	0	0
 mun		Munda languages	0	0
 mus		Creek	0	0
 mwr		Marwari	0	0
 myn		Mayan languages	0	0
 myv		Erzya	0	0
 nah		Nahuatl	0	0
 nai		North American Indian	0	0
 nap		Neapolitan	0	0
 nau	na	Nauru	0	0
 nav	nv	Navajo	0	0
 nbl	nr	Ndebele, South	0	0
 nde	nd	Ndebele, North	0	0
 ndo	ng	Ndonga	0	0
 nds		Low German	0	0
 nep	ne	Nepali	0	0
 new		Nepal Bhasa	0	0
 nia		Nias	0	0
 nic		Niger-Kordofanian (Other)	0	0
 niu		Niuean	0	0
 nno	nn	Norwegian Nynorsk	0	0
 nob	nb	Norwegian Bokmal	0	0
 nog		Nogai	0	0
 non		Norse, Old	0	0
 nor	no	Norwegian	1	1
 nso		Northern Sotho	0	0
 nub		Nubian languages	0	0
 nwc		Classical Newari	0	0
 nya	ny	Chichewa	0	0
 nym		Nyamwezi	0	0
 nyn		Nyankole	0	0
 nyo		Nyoro	0	0
 nzi		Nzima	0	0
 oci	oc	Occitan	1	1
 oji	oj	Ojibwa	0	0
 ori	or	Oriya	0	0
 orm	om	Oromo	0	0
 osa		Osage	0	0
 oss	os	Ossetian	0	0
 ota		Turkish, Ottoman (1500-1928)	0	0
 oto		Otomian languages	0	0
 paa		Papuan (Other)	0	0
 pag		Pangasinan	0	0
 pal		Pahlavi	0	0
 pam		Pampanga	0	0
 pan	pa	Panjabi	0	0
 pap		Papiamento	0	0
 pau		Palauan	0	0
 peo		Persian, Old (ca.600-400 B.C.)	0	0
 per	fa	Persian	1	1
 phi		Philippine (Other)	0	0
 phn		Phoenician	0	0
 pli	pi	Pali	0	0
 pol	pl	Polish	1	1
 pon		Pohnpeian	0	0
 por	pt	Portuguese	1	1
 pra		Prakrit languages	0	0
 pro		Provençal, Old (to 1500)	0	0
 pus	ps	Pushto	0	0
 que	qu	Quechua	0	0
 raj		Rajasthani	0	0
 rap		Rapanui	0	0
 rar		Rarotongan	0	0
 roa		Romance (Other)	0	0
 roh	rm	Raeto-Romance	0	0
 rom		Romany	0	0
 run	rn	Rundi	0	0
 rup		Aromanian	0	0
 rus	ru	Russian	1	1
 sad		Sandawe	0	0
 sag	sg	Sango	0	0
 sah		Yakut	0	0
 sai		South American Indian (Other)	0	0
 sal		Salishan languages	0	0
 sam		Samaritan Aramaic	0	0
 san	sa	Sanskrit	0	0
 sas		Sasak	0	0
 sat		Santali	0	0
 scc	sr	Serbian	1	1
 scn		Sicilian	0	0
 sco		Scots	0	0
 sel		Selkup	0	0
 sem		Semitic (Other)	0	0
 sga		Irish, Old (to 900)	0	0
 sgn		Sign Languages	0	0
 shn		Shan	0	0
 sid		Sidamo	0	0
 sin	si	Sinhalese	1	1
 sio		Siouan languages	0	0
 sit		Sino-Tibetan (Other)	0	0
 sla		Slavic (Other)	0	0
 slo	sk	Slovak	1	1
 slv	sl	Slovenian	1	1
 sma		Southern Sami	0	0
 sme	se	Northern Sami	0	0
 smi		Sami languages (Other)	0	0
 smj		Lule Sami	0	0
 smn		Inari Sami	0	0
 smo	sm	Samoan	0	0
 sms		Skolt Sami	0	0
 sna	sn	Shona	0	0
 snd	sd	Sindhi	0	0
 snk		Soninke	0	0
 sog		Sogdian	0	0
 som	so	Somali	0	0
 son		Songhai	0	0
 sot	st	Sotho, Southern	0	0
 spa	es	Spanish	1	1
 srd	sc	Sardinian	0	0
 srr		Serer	0	0
 ssa		Nilo-Saharan (Other)	0	0
 ssw	ss	Swati	0	0
 suk		Sukuma	0	0
 sun	su	Sundanese	0	0
 sus		Susu	0	0
 sux		Sumerian	0	0
 swa	sw	Swahili	1	0
 swe	sv	Swedish	1	1
 syr		Syriac	1	0
 tah	ty	Tahitian	0	0
 tai		Tai (Other)	0	0
 tam	ta	Tamil	0	0
 tat	tt	Tatar	0	0
 tel	te	Telugu	0	0
 tem		Timne	0	0
 ter		Tereno	0	0
 tet		Tetum	0	0
 tgk	tg	Tajik	0	0
 tgl	tl	Tagalog	1	1
 tha	th	Thai	1	1
 tib	bo	Tibetan	0	0
 tig		Tigre	0	0
 tir	ti	Tigrinya	0	0
 tiv		Tiv	0	0
 tkl		Tokelau	0	0
 tlh		Klingon	0	0
 tli		Tlingit	0	0
 tmh		Tamashek	0	0
 tog		Tonga (Nyasa)	0	0
 ton	to	Tonga (Tonga Islands)	0	0
 tpi		Tok Pisin	0	0
 tsi		Tsimshian	0	0
 tsn	tn	Tswana	0	0
 tso	ts	Tsonga	0	0
 tuk	tk	Turkmen	0	0
 tum		Tumbuka	0	0
 tup		Tupi languages	0	0
 tur	tr	Turkish	1	1
 tut		Altaic (Other)	0	0
 tvl		Tuvalu	0	0
 twi	tw	Twi	0	0
 tyv		Tuvinian	0	0
 udm		Udmurt	0	0
 uga		Ugaritic	0	0
 uig	ug	Uighur	0	0
 ukr	uk	Ukrainian	1	1
 umb		Umbundu	0	0
 und		Undetermined	0	0
 urd	ur	Urdu	1	0
 uzb	uz	Uzbek	0	0
 vai		Vai	0	0
 ven	ve	Venda	0	0
 vie	vi	Vietnamese	1	1
 vol	vo	Volapük	0	0
 vot		Votic	0	0
 wak		Wakashan languages	0	0
 wal		Walamo	0	0
 war		Waray	0	0
 was		Washo	0	0
 wel	cy	Welsh	0	0
 wen		Sorbian languages	0	0
 wln	wa	Walloon	0	0
 wol	wo	Wolof	0	0
 xal		Kalmyk	0	0
 xho	xh	Xhosa	0	0
 yao		Yao	0	0
 yap		Yapese	0	0
 yid	yi	Yiddish	0	0
 yor	yo	Yoruba	0	0
 ypk		Yupik languages	0	0
 zap		Zapotec	0	0
 zen		Zenaga	0	0
 zha	za	Zhuang	0	0
 znd		Zande	0	0
 zul	zu	Zulu	0	0
 zun		Zuni	0	0
 rum	ro	Romanian	1	1
 pob	pb	Brazilian	1	1
--- a/lib/guessit/test/test_api.py
+++ b/lib/guessit/test/test_api.py
@ -1,54 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestApi(TestGuessit):
    def test_api(self):
        movie_path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
        movie_info = guessit.guess_movie_info(movie_path)
        video_info = guessit.guess_video_info(movie_path)
        episode_info = guessit.guess_episode_info(movie_path)
        file_info = guessit.guess_file_info(movie_path)
        self.assertEqual(guessit.guess_file_info(movie_path, type='movie'), movie_info)
        self.assertEqual(guessit.guess_file_info(movie_path, type='video'), video_info)
        self.assertEqual(guessit.guess_file_info(movie_path, type='episode'), episode_info)
        self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'movie'}), movie_info)
        self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'video'}), video_info)
        self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}), episode_info)
        self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}, type='movie'), episode_info)  # kwargs priority other options
        movie_path_name_only = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD'
        file_info_name_only = guessit.guess_file_info(movie_path_name_only, options={"name_only": True})
        self.assertFalse('container' in file_info_name_only)
        self.assertTrue('container' in file_info)
 suite = allTests(TestApi)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_autodetect.py
+++ b/lib/guessit/test/test_autodetect.py
@ -1,45 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestAutoDetect(TestGuessit):
    def testEmpty(self):
        result = guessit.guess_file_info('')
        self.assertEqual(result, {})
        result = guessit.guess_file_info('___-__')
        self.assertEqual(result, {})
        result = guessit.guess_file_info('__-.avc')
        self.assertEqual(result, {'type': 'unknown', 'extension': 'avc'})
    def testAutoDetect(self):
        self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
                                       remove_type=False)
 suite = allTests(TestAutoDetect)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_autodetect_all.py
+++ b/lib/guessit/test/test_autodetect_all.py
@ -1,46 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 IGNORE_EPISODES = []
 IGNORE_MOVIES = []
 class TestAutoDetectAll(TestGuessit):
    def testAutoMatcher(self):
        self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
                                       remove_type=False)
    def testAutoMatcherMovies(self):
        self.checkMinimumFieldsCorrect(filename='movies.yaml',
                                       exclude_files=IGNORE_MOVIES)
    def testAutoMatcherEpisodes(self):
        self.checkMinimumFieldsCorrect(filename='episodes.yaml',
                                       exclude_files=IGNORE_EPISODES)
 suite = allTests(TestAutoDetectAll)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_doctests.py
+++ b/lib/guessit/test/test_doctests.py
@ -1,45 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 import guessit
 import guessit.hash_ed2k
 import unittest
 import doctest
 def load_tests(loader, tests, ignore):
    tests.addTests(doctest.DocTestSuite(guessit))
    tests.addTests(doctest.DocTestSuite(guessit.date))
    tests.addTests(doctest.DocTestSuite(guessit.fileutils))
    tests.addTests(doctest.DocTestSuite(guessit.guess))
    tests.addTests(doctest.DocTestSuite(guessit.hash_ed2k))
    tests.addTests(doctest.DocTestSuite(guessit.language))
    tests.addTests(doctest.DocTestSuite(guessit.matchtree))
    tests.addTests(doctest.DocTestSuite(guessit.textutils))
    return tests
 suite = unittest.TestSuite()
 load_tests(None, suite, None)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_episode.py
+++ b/lib/guessit/test/test_episode.py
@ -1,35 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestEpisode(TestGuessit):
    def testEpisodes(self):
        self.checkMinimumFieldsCorrect(filetype='episode',
                                       filename='episodes.yaml')
 suite = allTests(TestEpisode)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_hashes.py
+++ b/lib/guessit/test/test_hashes.py
@ -1,46 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestHashes(TestGuessit):
    def test_hashes(self):
        hashes = (
                  ('hash_mpc', '1MB', u'8542ad406c15c8bd'),  # TODO: Check if this value is valid
                  ('hash_ed2k', '1MB', u'ed2k://|file|1MB|1048576|AA3CC5552A9931A76B61A41D306735F7|/'),  # TODO: Check if this value is valid
                  ('hash_md5', '1MB', u'5d8dcbca8d8ac21766f28797d6c3954c'),
                  ('hash_sha1', '1MB', u'51d2b8f3248d7ee495b7750c8da5aa3b3819de9d'),
                  ('hash_md5', 'dummy.srt', u'64de6b5893cac24456c46a935ef9c359'),
                  ('hash_sha1', 'dummy.srt', u'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
                  )
        for hash_type, filename, expected_value in hashes:
            guess = guess_file_info(file_in_same_dir(__file__, filename), hash_type)
            computed_value = guess.get(hash_type)
            self.assertEqual(expected_value, guess.get(hash_type), "Invalid %s for %s: %s != %s" % (hash_type, filename, computed_value, expected_value))
 suite = allTests(TestHashes)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_language.py
+++ b/lib/guessit/test/test_language.py
@ -1,138 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 import io
 class TestLanguage(TestGuessit):
    def check_languages(self, languages):
        for lang1, lang2 in languages.items():
            self.assertEqual(Language(lang1),
                             Language(lang2))
    def test_addic7ed(self):
        languages = {'English': 'en',
                     'English (US)': 'en',
                     'English (UK)': 'en',
                     'Italian': 'it',
                     'Portuguese': 'pt',
                     'Portuguese (Brazilian)': 'pt',
                     'Romanian': 'ro',
                     'Español (Latinoamérica)': 'es',
                     'Español (España)': 'es',
                     'Spanish (Latin America)': 'es',
                     'Español': 'es',
                     'Spanish': 'es',
                     'Spanish (Spain)': 'es',
                     'French': 'fr',
                     'Greek': 'el',
                     'Arabic': 'ar',
                     'German': 'de',
                     'Croatian': 'hr',
                     'Indonesian': 'id',
                     'Hebrew': 'he',
                     'Russian': 'ru',
                     'Turkish': 'tr',
                     'Swedish': 'se',
                     'Czech': 'cs',
                     'Dutch': 'nl',
                     'Hungarian': 'hu',
                     'Norwegian': 'no',
                     'Polish': 'pl',
                     'Persian': 'fa'}
        self.check_languages(languages)
    def test_subswiki(self):
        languages = {'English (US)': 'en', 'English (UK)': 'en', 'English': 'en',
                     'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
                     'Español (Latinoamérica)': 'es', 'Español (España)': 'es',
                     'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
        self.check_languages(languages)
    def test_tvsubtitles(self):
        languages = {'English': 'en', 'Español': 'es', 'French': 'fr', 'German': 'de',
                     'Brazilian': 'br', 'Russian': 'ru', 'Ukrainian': 'ua', 'Italian': 'it',
                     'Greek': 'gr', 'Arabic': 'ar', 'Hungarian': 'hu', 'Polish': 'pl',
                     'Turkish': 'tr', 'Dutch': 'nl', 'Portuguese': 'pt', 'Swedish': 'sv',
                     'Danish': 'da', 'Finnish': 'fi', 'Korean': 'ko', 'Chinese': 'cn',
                     'Japanese': 'jp', 'Bulgarian': 'bg', 'Czech': 'cz', 'Romanian': 'ro'}
        self.check_languages(languages)
    def test_opensubtitles(self):
        opensubtitles_langfile = file_in_same_dir(__file__, 'opensubtitles_languages_2012_05_09.txt')
        for l in [u(l).strip() for l in io.open(opensubtitles_langfile, encoding='utf-8')][1:]:
            idlang, alpha2, _, upload_enabled, web_enabled = l.strip().split('\t')
            # do not test languages that are too esoteric / not widely available
            if int(upload_enabled) and int(web_enabled):
                # check that we recognize the opensubtitles language code correctly
                # and that we are able to output this code from a language
                self.assertEqual(idlang, Language(idlang).opensubtitles)
                if alpha2:
                    # check we recognize the opensubtitles 2-letter code correctly
                    self.check_languages({idlang: alpha2})
    def test_tmdb(self):
        # examples from http://api.themoviedb.org/2.1/language-tags
        for lang in ['en-US', 'en-CA', 'es-MX', 'fr-PF']:
            self.assertEqual(lang, Language(lang).tmdb)
    def test_subtitulos(self):
        languages = {'English (US)': 'en', 'English (UK)': 'en', 'English': 'en',
                     'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
                     'Español (Latinoamérica)': 'es', 'Español (España)': 'es',
                     'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
        self.check_languages(languages)
    def test_thesubdb(self):
        languages = {'af': 'af', 'cs': 'cs', 'da': 'da', 'de': 'de', 'en': 'en', 'es': 'es', 'fi': 'fi',
                     'fr': 'fr', 'hu': 'hu', 'id': 'id', 'it': 'it', 'la': 'la', 'nl': 'nl', 'no': 'no',
                     'oc': 'oc', 'pl': 'pl', 'pt': 'pt', 'ro': 'ro', 'ru': 'ru', 'sl': 'sl', 'sr': 'sr',
                     'sv': 'sv', 'tr': 'tr'}
        self.check_languages(languages)
    def test_language_object(self):
        self.assertEqual(len(list(set([Language('qwerty'), Language('asdf')]))), 1)
        d = {Language('qwerty'): 7}
        d[Language('asdf')] = 23
        self.assertEqual(d[Language('qwerty')], 23)
    def test_exceptions(self):
        self.assertEqual(Language('br'), Language('pt(br)'))
        # languages should be equal regardless of country
        self.assertEqual(Language('br'), Language('pt'))
        self.assertEqual(Language('unknown'), Language('und'))
 suite = allTests(TestLanguage)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_main.py
+++ b/lib/guessit/test/test_main.py
@ -1,70 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 from guessit.fileutils import split_path, file_in_same_dir
 from guessit.textutils import strip_brackets, str_replace, str_fill
 from guessit import PY2
 from guessit import __main__
 if PY2:
    from StringIO import StringIO
 else:
    from io import StringIO
 class TestMain(TestGuessit):
    def setUp(self):
        self._stdout = sys.stdout
        string_out = StringIO()
        sys.stdout = string_out
    def tearDown(self):
        sys.stdout = self._stdout
    def test_list_properties(self):
        __main__.main(["-p"], False)
        __main__.main(["-l"], False)
    def test_list_transformers(self):
        __main__.main(["--transformers"], False)
        __main__.main(["-l", "--transformers"], False)
    def test_demo(self):
        __main__.main(["-d"], False)
        __main__.main(["-l"], False)
    def test_filename(self):
        __main__.main(["A.Movie.2014.avi"], False)
        __main__.main(["A.Movie.2014.avi", "A.2nd.Movie.2014.avi"], False)
        __main__.main(["-y", "A.Movie.2014.avi"], False)
        __main__.main(["-a", "A.Movie.2014.avi"], False)
        __main__.main(["-v", "A.Movie.2014.avi"], False)
        __main__.main(["-t", "movie", "A.Movie.2014.avi"], False)
        __main__.main(["-t", "episode", "A.Serie.S02E06.avi"], False)
        __main__.main(["-i", "hash_mpc", file_in_same_dir(__file__, "1MB")], False)
        __main__.main(["-i", "hash_md5", file_in_same_dir(__file__, "1MB")], False)
 suite = allTests(TestMain)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_matchtree.py
+++ b/lib/guessit/test/test_matchtree.py
@ -1,93 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 from guessit.transfo.guess_release_group import GuessReleaseGroup
 from guessit.transfo.guess_properties import GuessProperties
 from guessit.matchtree import BaseMatchTree
 keywords = yaml.load("""
 ? Xvid PROPER
 : videoCodec: Xvid
  other: PROPER
 ? PROPER-Xvid
 : videoCodec: Xvid
  other: PROPER
 """)
 def guess_info(string, options=None):
    mtree = MatchTree(string)
    GuessReleaseGroup().process(mtree, options)
    GuessProperties().process(mtree, options)
    return mtree.matched()
 class TestMatchTree(TestGuessit):
    def test_base_tree(self):
        t = BaseMatchTree('One Two Three(Three) Four')
        t.partition((3, 7, 20))
        leaves = t.leaves()
        self.assertEqual(leaves[0].span, (0, 3))
        self.assertEqual('One', leaves[0].value)
        self.assertEqual(' Two', leaves[1].value)
        self.assertEqual(' Three(Three)', leaves[2].value)
        self.assertEqual(' Four', leaves[3].value)
        leaves[2].partition((1, 6, 7, 12))
        three_leaves = leaves[2].leaves()
        self.assertEqual('Three', three_leaves[1].value)
        self.assertEqual('Three', three_leaves[3].value)
        leaves = t.leaves()
        self.assertEqual(len(leaves), 8)
        self.assertEqual(leaves[5], three_leaves[3])
        self.assertEqual(t.previous_leaf(leaves[5]), leaves[4])
        self.assertEqual(t.next_leaf(leaves[5]), leaves[6])
        self.assertEqual(t.next_leaves(leaves[5]), [leaves[6], leaves[7]])
        self.assertEqual(t.previous_leaves(leaves[5]), [leaves[4], leaves[3], leaves[2], leaves[1], leaves[0]])
        self.assertEqual(t.next_leaf(leaves[7]), None)
        self.assertEqual(t.previous_leaf(leaves[0]), None)
        self.assertEqual(t.next_leaves(leaves[7]), [])
        self.assertEqual(t.previous_leaves(leaves[0]), [])
    def test_match(self):
        self.checkFields(keywords, guess_info)
 suite = allTests(TestMatchTree)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_movie.py
+++ b/lib/guessit/test/test_movie.py
@ -1,35 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestMovie(TestGuessit):
    def testMovies(self):
        self.checkMinimumFieldsCorrect(filetype='movie',
                                       filename='movies.yaml')
 suite = allTests(TestMovie)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_quality.py
+++ b/lib/guessit/test/test_quality.py
@ -1,126 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.quality import best_quality, best_quality_properties
 from guessit.containers import QualitiesContainer
 from guessit.test.guessittest import *
 class TestQuality(TestGuessit):
    def test_container(self):
        container = QualitiesContainer()
        container.register_quality('color', 'red', 10)
        container.register_quality('color', 'orange', 20)
        container.register_quality('color', 'green', 30)
        container.register_quality('context', 'sun', 100)
        container.register_quality('context', 'sea', 200)
        container.register_quality('context', 'sex', 300)
        g1 = Guess()
        g1['color'] = 'red'
        g2 = Guess()
        g2['color'] = 'green'
        g3 = Guess()
        g3['color'] = 'orange'
        q3 = container.rate_quality(g3)
        self.assertEqual(q3, 20, "ORANGE should be rated 20. Don't ask why!")
        q1 = container.rate_quality(g1)
        q2 = container.rate_quality(g2)
        self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
        g1['context'] = 'sex'
        g2['context'] = 'sun'
        q1 = container.rate_quality(g1)
        q2 = container.rate_quality(g2)
        self.assertTrue(q1 > q2, "SEX should be greater than SUN. Don't ask why!")
        self.assertEqual(container.best_quality(g1, g2), g1, "RED&SEX should be better than GREEN&SUN. Don't ask why!")
        self.assertEqual(container.best_quality_properties(['color'], g1, g2), g2, "GREEN should be better than RED. Don't ask why!")
        self.assertEqual(container.best_quality_properties(['context'], g1, g2), g1, "SEX should be better than SUN. Don't ask why!")
        q1 = container.rate_quality(g1, 'color')
        q2 = container.rate_quality(g2, 'color')
        self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
        container.unregister_quality('context', 'sex')
        container.unregister_quality('context', 'sun')
        q1 = container.rate_quality(g1)
        q2 = container.rate_quality(g2)
        self.assertTrue(q2 > q1, "GREEN&SUN should be greater than RED&SEX. Don't ask why!")
        g3['context'] = 'sea'
        container.unregister_quality('context', 'sea')
        q3 = container.rate_quality(g3, 'context')
        self.assertEqual(q3, 0, "Context should be unregistered.")
        container.unregister_quality('color')
        q3 = container.rate_quality(g3, 'color')
        self.assertEqual(q3, 0, "Color should be unregistered.")
        container.clear_qualities()
        q1 = container.rate_quality(g1)
        q2 = container.rate_quality(g2)
        self.assertTrue(q1 == q2 == 0, "Empty quality container should rate each guess to 0")
    def test_quality_transformers(self):
        guess_720p = guessit.guess_file_info("2012.2009.720p.BluRay.x264.DTS WiKi.mkv")
        guess_1080p = guessit.guess_file_info("2012.2009.1080p.BluRay.x264.MP3 WiKi.mkv")
        self.assertTrue('audioCodec' in guess_720p, "audioCodec should be present")
        self.assertTrue('audioCodec' in guess_1080p, "audioCodec should be present")
        self.assertTrue('screenSize' in guess_720p, "screenSize should be present")
        self.assertTrue('screenSize' in guess_1080p, "screenSize should be present")
        best_quality_guess = best_quality(guess_720p, guess_1080p)
        self.assertTrue(guess_1080p == best_quality_guess, "1080p+MP3 is not the best global quality")
        best_quality_guess = best_quality_properties(['screenSize'], guess_720p, guess_1080p)
        self.assertTrue(guess_1080p == best_quality_guess, "1080p is not the best screenSize")
        best_quality_guess = best_quality_properties(['audioCodec'], guess_720p, guess_1080p)
        self.assertTrue(guess_720p == best_quality_guess, "DTS is not the best audioCodec")
 suite = allTests(TestQuality)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/test/test_utils.py
+++ b/lib/guessit/test/test_utils.py
@ -1,155 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 from guessit.fileutils import split_path
 from guessit.textutils import strip_brackets, str_replace, str_fill, from_camel, is_camel,\
    levenshtein, reorder_title
 from guessit import PY2
 from guessit.date import search_date, search_year
 from datetime import datetime, date, timedelta
 class TestUtils(TestGuessit):
    def test_splitpath(self):
        alltests = {False: {'/usr/bin/smewt': ['/', 'usr', 'bin', 'smewt'],
                                           'relative_path/to/my_folder/': ['relative_path', 'to', 'my_folder'],
                                           '//some/path': ['//', 'some', 'path'],
                                           '//some//path': ['//', 'some', 'path'],
                                           '///some////path': ['///', 'some', 'path']
                                             },
                     True: {'C:\\Program Files\\Smewt\\smewt.exe': ['C:\\', 'Program Files', 'Smewt', 'smewt.exe'],
                                  'Documents and Settings\\User\\config': ['Documents and Settings', 'User', 'config'],
                                  'C:\\Documents and Settings\\User\\config': ['C:\\', 'Documents and Settings', 'User', 'config'],
                                  # http://bugs.python.org/issue19945
                                  '\\\\netdrive\\share': ['\\\\', 'netdrive', 'share'] if PY2 else ['\\\\netdrive\\share'],
                                  '\\\\netdrive\\share\\folder': ['\\\\', 'netdrive', 'share', 'folder'] if PY2 else ['\\\\netdrive\\share\\', 'folder'],
                                  }
                     }
        tests = alltests[sys.platform == 'win32']
        for path, split in tests.items():
            self.assertEqual(split, split_path(path))
    def test_strip_brackets(self):
        allTests = (('', ''),
                    ('[test]', 'test'),
                    ('{test2}', 'test2'),
                    ('(test3)', 'test3'),
                    ('(test4]', '(test4]'),
                    )
        for i, e in allTests:
            self.assertEqual(e, strip_brackets(i))
    def test_levenshtein(self):
        self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmno"), 0)
        self.assertEqual(levenshtein("abcdef ghijk lmnop", "abcdef ghijk lmno"), 1)
        self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmn"), 1)
        self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnp"), 1)
        self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnq"), 1)
        self.assertEqual(levenshtein("cbcdef ghijk lmno", "abcdef ghijk lmnq"), 2)
        self.assertEqual(levenshtein("cbcdef ghihk lmno", "abcdef ghijk lmnq"), 3)
    def test_reorder_title(self):
        self.assertEqual(reorder_title("Simpsons, The"), "The Simpsons")
        self.assertEqual(reorder_title("Simpsons,The"), "The Simpsons")
        self.assertEqual(reorder_title("Simpsons,Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
        self.assertEqual(reorder_title("Simpsons, Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
    def test_camel(self):
        self.assertEqual("", from_camel(""))
        self.assertEqual("Hello world", str_replace("Hello World", 6, 'w'))
        self.assertEqual("Hello *****", str_fill("Hello World", (6, 11), '*'))
        self.assertTrue("This is camel", from_camel("ThisIsCamel"))
        self.assertEqual('camel case', from_camel('camelCase'))
        self.assertEqual('A case', from_camel('ACase'))
        self.assertEqual('MiXedCaSe is not camel case', from_camel('MiXedCaSe is not camelCase'))
        self.assertEqual("This is camel cased title", from_camel("ThisIsCamelCasedTitle"))
        self.assertEqual("This is camel CASED title", from_camel("ThisIsCamelCASEDTitle"))
        self.assertEqual("These are camel CASED title", from_camel("TheseAreCamelCASEDTitle"))
        self.assertEqual("Give a camel case string", from_camel("GiveACamelCaseString"))
        self.assertEqual("Death TO camel case", from_camel("DeathTOCamelCase"))
        self.assertEqual("But i like java too:)", from_camel("ButILikeJavaToo:)"))
        self.assertEqual("Beatdown french DVD rip.mkv", from_camel("BeatdownFrenchDVDRip.mkv"))
        self.assertEqual("DO NOTHING ON UPPER CASE", from_camel("DO NOTHING ON UPPER CASE"))
        self.assertFalse(is_camel("this_is_not_camel"))
        self.assertTrue(is_camel("ThisIsCamel"))
        self.assertEqual("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv", from_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
        self.assertFalse(is_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
        self.assertEqual("A2LiNE", from_camel("A2LiNE"))
    def test_date(self):
        self.assertEqual(search_year(' in the year 2000... '), (2000, (13, 17)))
        self.assertEqual(search_year(' they arrived in 1492. '), (None, None))
        today = date.today()
        today_year_2 = int(str(today.year)[2:])
        future = today + timedelta(days=1000)
        future_year_2 = int(str(future.year)[2:])
        past = today - timedelta(days=10000)
        past_year_2 = int(str(past.year)[2:])
        self.assertEqual(search_date(' Something before 2002-04-22 '), (date(2002, 4, 22), (18, 28)))
        self.assertEqual(search_date(' 2002-04-22 Something after '), (date(2002, 4, 22), (1, 11)))
        self.assertEqual(search_date(' This happened on 2002-04-22. '), (date(2002, 4, 22), (18, 28)))
        self.assertEqual(search_date(' This happened on 22-04-2002. '), (date(2002, 4, 22), (18, 28)))
        self.assertEqual(search_date(' This happened on 13-04-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
        self.assertEqual(search_date(' This happened on 22-04-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
        self.assertEqual(search_date(' This happened on 20-04-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
        self.assertEqual(search_date(' This happened on 04-13-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
        self.assertEqual(search_date(' This happened on 04-22-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
        self.assertEqual(search_date(' This happened on 04-20-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
        self.assertEqual(search_date(' This happened on 35-12-%s. ' % (today_year_2,)), (None, None))
        self.assertEqual(search_date(' This happened on 37-18-%s. ' % (future_year_2,)), (None, None))
        self.assertEqual(search_date(' This happened on 44-42-%s. ' % (past_year_2)), (None, None))
        self.assertEqual(search_date(' This happened on %s. ' % (today, )), (today, (18, 28)))
        self.assertEqual(search_date(' This happened on %s. ' % (future, )), (future, (18, 28)))
        self.assertEqual(search_date(' This happened on %s. ' % (past, )), (past, (18, 28)))
        self.assertEqual(search_date(' released date: 04-03-1901? '), (None, None))
        self.assertEqual(search_date(' There\'s no date in here. '), (None, None))
 suite = allTests(TestUtils)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/lib/guessit/textutils.py
+++ b/lib/guessit/textutils.py
@ -1,25 +1,24 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# GuessIt - A library for guessing information from filenames
+# Smewt - A smart collection manager
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2008-2012 Nicolas Wack <wackou@gmail.com>
 #
-# GuessIt is free software; you can redistribute it and/or modify it under
+# Smewt is free software; you can redistribute it and/or modify
-# the terms of the Lesser GNU General Public License as published by
+# it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
-# GuessIt is distributed in the hope that it will be useful,
+# Smewt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# Lesser GNU General Public License for more details.
+# GNU General Public License for more details.
 #
-# You should have received a copy of the Lesser GNU General Public License
+# You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import s
 from guessit.patterns import sep
 import functools
@ -28,7 +27,6 @@ import re
 # string-related functions
 def normalize_unicode(s):
    return unicodedata.normalize('NFC', s)
@ -45,36 +43,19 @@ def strip_brackets(s):
    return s
 _dotted_rexp = re.compile(r'(?:\W|^)(([A-Za-z]\.){2,}[A-Za-z]\.?)')
 def clean_string(st):
    for c in sep:
        # do not remove certain chars
        if c in ['-', ',']:
            continue
        if c == '.':
            # we should not remove the dots for acronyms and such
            dotted = _dotted_rexp.search(st)
            if dotted:
                s = dotted.group(1)
                exclude_begin, exclude_end = dotted.span(1)
                st = (st[:exclude_begin].replace(c, ' ') +
                      st[exclude_begin:exclude_end] +
                      st[exclude_end:].replace(c, ' '))
                continue
        st = st.replace(c, ' ')
    parts = st.split()
    result = ' '.join(p for p in parts if p != '')
    # now also remove dashes on the outer part of the string
-    while result and result[0] in '-':
+    while result and result[0] in sep:
        result = result[1:]
-    while result and result[-1] in '-':
+    while result and result[-1] in sep:
        result = result[:-1]
    return result
@ -82,23 +63,21 @@ def clean_string(st):
 _words_rexp = re.compile('\w+', re.UNICODE)
 def find_words(s):
    return _words_rexp.findall(s.replace('_', ' '))
-def reorder_title(title, articles=('the',), separators=(',', ', ')):
+def reorder_title(title):
    ltitle = title.lower()
-    for article in articles:
+    if ltitle[-4:] == ',the':
-        for separator in separators:
+        return title[-3:] + ' ' + title[:-4]
-            suffix = separator + article
+    if ltitle[-5:] == ', the':
-            if ltitle[-len(suffix):] == suffix:
+        return title[-3:] + ' ' + title[:-5]
                return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
    return title
 def str_replace(string, pos, c):
-    return string[:pos] + c + string[pos + 1:]
+    return string[:pos] + c + string[pos+1:]
 def str_fill(string, region, c):
@ -106,6 +85,7 @@ def str_fill(string, region, c):
    return string[:start] + c * (end - start) + string[end:]
 def levenshtein(a, b):
    if not a:
        return len(b)
@ -115,25 +95,25 @@ def levenshtein(a, b):
    m = len(a)
    n = len(b)
    d = []
-    for i in range(m + 1):
+    for i in range(m+1):
-        d.append([0] * (n + 1))
+        d.append([0] * (n+1))
-    for i in range(m + 1):
+    for i in range(m+1):
        d[i][0] = i
-    for j in range(n + 1):
+    for j in range(n+1):
        d[0][j] = j
-    for i in range(1, m + 1):
+    for i in range(1, m+1):
-        for j in range(1, n + 1):
+        for j in range(1, n+1):
-            if a[i - 1] == b[j - 1]:
+            if a[i-1] == b[j-1]:
                cost = 0
            else:
                cost = 1
-            d[i][j] = min(d[i - 1][j] + 1,  # deletion
+            d[i][j] = min(d[i-1][j] + 1,     # deletion
-                          d[i][j - 1] + 1,  # insertion
+                          d[i][j-1] + 1,     # insertion
-                          d[i - 1][j - 1] + cost  # substitution
+                          d[i-1][j-1] + cost # substitution
                          )
    return d[m][n]
@ -160,7 +140,7 @@ def find_first_level_groups_span(string, enclosing):
    [(2, 5), (7, 10)]
    """
    opening, closing = enclosing
-    depth = []  # depth is a stack of indices where we opened a group
+    depth = [] # depth is a stack of indices where we opened a group
    result = []
    for i, c, in enumerate(string):
        if c == opening:
@ -171,7 +151,7 @@ def find_first_level_groups_span(string, enclosing):
                end = i
                if not depth:
                    # we emptied our stack, so we have a 1st level group
-                    result.append((start, end + 1))
+                    result.append((start, end+1))
            except IndexError:
                # we closed a group which was not opened before
                pass
@ -192,7 +172,7 @@ def split_on_groups(string, groups):
    """
    if not groups:
-        return [string]
+        return [ string ]
    boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, [])))
    if boundaries[0] != 0:
@ -200,10 +180,10 @@ def split_on_groups(string, groups):
    if boundaries[-1] != len(string):
        boundaries.append(len(string))
-    groups = [string[start:end] for start, end in zip(boundaries[:-1],
+    groups = [ string[start:end] for start, end in zip(boundaries[:-1],
-                                                       boundaries[1:])]
+                                                       boundaries[1:]) ]
-    return [g for g in groups if g]  # return only non-empty groups
+    return [ g for g in groups if g ] # return only non-empty groups
 def find_first_level_groups(string, enclosing, blank_sep=None):
@ -239,114 +219,6 @@ def find_first_level_groups(string, enclosing, blank_sep=None):
    if blank_sep:
        for start, end in groups:
            string = str_replace(string, start, blank_sep)
-            string = str_replace(string, end - 1, blank_sep)
+            string = str_replace(string, end-1, blank_sep)
    return split_on_groups(string, groups)
 _camel_word2_set = set(('is', 'to',))
 _camel_word3_set = set(('the',))
 def _camel_split_and_lower(string, i):
        """Retrieves a tuple (need_split, need_lower)
        need_split is True if this char is a first letter in a camelCasedString.
        need_lower is True if this char should be lowercased.
        """
        def islower(c):
            return c.isalpha() and not c.isupper()
        previous_char2 = string[i - 2] if i > 1 else None
        previous_char = string[i - 1] if i > 0 else None
        char = string[i]
        next_char = string[i + 1] if i + 1 < len(string) else None
        next_char2 = string[i + 2] if i + 2 < len(string) else None
        char_upper = char.isupper()
        char_lower = islower(char)
        # previous_char2_lower = islower(previous_char2) if previous_char2 else False
        previous_char2_upper = previous_char2.isupper() if previous_char2 else False
        previous_char_lower = islower(previous_char) if previous_char else False
        previous_char_upper = previous_char.isupper() if previous_char else False
        next_char_upper = next_char.isupper() if next_char else False
        next_char_lower = islower(next_char) if next_char else False
        next_char2_upper = next_char2.isupper() if next_char2 else False
        # next_char2_lower = islower(next_char2) if next_char2 else False
        mixedcase_word = (previous_char_upper and char_lower and next_char_upper) or \
                        (previous_char_lower and char_upper and next_char_lower and next_char2_upper) or \
                        (previous_char2_upper and previous_char_lower and char_upper)
        if mixedcase_word:
            word2 = (char + next_char).lower() if next_char else None
            word3 = (char + next_char + next_char2).lower() if next_char and next_char2 else None
            word2b = (previous_char2 + previous_char).lower() if previous_char2 and previous_char else None
            if word2 in _camel_word2_set or word2b in _camel_word2_set or word3 in _camel_word3_set:
                mixedcase_word = False
        uppercase_word = previous_char_upper and char_upper and next_char_upper or (char_upper and next_char_upper and next_char2_upper)
        need_split = char_upper and previous_char_lower and not mixedcase_word
        if not need_split:
            previous_char_upper = string[i - 1].isupper() if i > 0 else False
            next_char_lower = (string[i + 1].isalpha() and not string[i + 1].isupper()) if i + 1 < len(string) else False
            need_split = char_upper and previous_char_upper and next_char_lower
            uppercase_word = previous_char_upper and not next_char_lower
        need_lower = not uppercase_word and not mixedcase_word and need_split
        return (need_split, need_lower)
 def is_camel(string):
    """
    >>> is_camel('dogEATDog')
    True
    >>> is_camel('DeathToCamelCase')
    True
    >>> is_camel('death_to_camel_case')
    False
    >>> is_camel('TheBest')
    True
    >>> is_camel('The Best')
    False
    """
    for i in range(0, len(string)):
        need_split, _ = _camel_split_and_lower(string, i)
        if need_split:
            return True
    return False
 def from_camel(string):
    """
    >>> from_camel('dogEATDog') == 'dog EAT dog'
    True
    >>> from_camel('DeathToCamelCase') == 'Death to camel case'
    True
    >>> from_camel('TheBest') == 'The best'
    True
    >>> from_camel('MiXedCaSe is not camelCase') == 'MiXedCaSe is not camel case'
    True
    """
    if not string:
        return string
    pieces = []
    for i in range(0, len(string)):
        char = string[i]
        need_split, need_lower = _camel_split_and_lower(string, i)
        if need_split:
            pieces.append(' ')
        if need_lower:
            pieces.append(char.lower())
        else:
            pieces.append(char)
    return ''.join(pieces)
--- a/lib/guessit/tlds-alpha-by-domain.txt
+++ b/lib/guessit/tlds-alpha-by-domain.txt
@ -1,341 +0,0 @@
 # Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
 AC
 AD
 AE
 AERO
 AF
 AG
 AI
 AL
 AM
 AN
 AO
 AQ
 AR
 ARPA
 AS
 ASIA
 AT
 AU
 AW
 AX
 AZ
 BA
 BB
 BD
 BE
 BF
 BG
 BH
 BI
 BIKE
 BIZ
 BJ
 BM
 BN
 BO
 BR
 BS
 BT
 BV
 BW
 BY
 BZ
 CA
 CAMERA
 CAT
 CC
 CD
 CF
 CG
 CH
 CI
 CK
 CL
 CLOTHING
 CM
 CN
 CO
 COM
 CONSTRUCTION
 CONTRACTORS
 COOP
 CR
 CU
 CV
 CW
 CX
 CY
 CZ
 DE
 DIAMONDS
 DIRECTORY
 DJ
 DK
 DM
 DO
 DZ
 EC
 EDU
 EE
 EG
 ENTERPRISES
 EQUIPMENT
 ER
 ES
 ESTATE
 ET
 EU
 FI
 FJ
 FK
 FM
 FO
 FR
 GA
 GALLERY
 GB
 GD
 GE
 GF
 GG
 GH
 GI
 GL
 GM
 GN
 GOV
 GP
 GQ
 GR
 GRAPHICS
 GS
 GT
 GU
 GURU
 GW
 GY
 HK
 HM
 HN
 HOLDINGS
 HR
 HT
 HU
 ID
 IE
 IL
 IM
 IN
 INFO
 INT
 IO
 IQ
 IR
 IS
 IT
 JE
 JM
 JO
 JOBS
 JP
 KE
 KG
 KH
 KI
 KITCHEN
 KM
 KN
 KP
 KR
 KW
 KY
 KZ
 LA
 LAND
 LB
 LC
 LI
 LIGHTING
 LK
 LR
 LS
 LT
 LU
 LV
 LY
 MA
 MC
 MD
 ME
 MG
 MH
 MIL
 MK
 ML
 MM
 MN
 MO
 MOBI
 MP
 MQ
 MR
 MS
 MT
 MU
 MUSEUM
 MV
 MW
 MX
 MY
 MZ
 NA
 NAME
 NC
 NE
 NET
 NF
 NG
 NI
 NL
 NO
 NP
 NR
 NU
 NZ
 OM
 ORG
 PA
 PE
 PF
 PG
 PH
 PHOTOGRAPHY
 PK
 PL
 PLUMBING
 PM
 PN
 POST
 PR
 PRO
 PS
 PT
 PW
 PY
 QA
 RE
 RO
 RS
 RU
 RW
 SA
 SB
 SC
 SD
 SE
 SEXY
 SG
 SH
 SI
 SINGLES
 SJ
 SK
 SL
 SM
 SN
 SO
 SR
 ST
 SU
 SV
 SX
 SY
 SZ
 TATTOO
 TC
 TD
 TECHNOLOGY
 TEL
 TF
 TG
 TH
 TIPS
 TJ
 TK
 TL
 TM
 TN
 TO
 TODAY
 TP
 TR
 TRAVEL
 TT
 TV
 TW
 TZ
 UA
 UG
 UK
 US
 UY
 UZ
 VA
 VC
 VE
 VENTURES
 VG
 VI
 VN
 VOYAGE
 VU
 WF
 WS
 XN--3E0B707E
 XN--45BRJ9C
 XN--80AO21A
 XN--80ASEHDB
 XN--80ASWG
 XN--90A3AC
 XN--CLCHC0EA0B2G2A9GCD
 XN--FIQS8S
 XN--FIQZ9S
 XN--FPCRJ9C3D
 XN--FZC2C9E2C
 XN--GECRJ9C
 XN--H2BRJ9C
 XN--J1AMH
 XN--J6W193G
 XN--KPRW13D
 XN--KPRY57D
 XN--L1ACC
 XN--LGBBAT1AD8J
 XN--MGB9AWBF
 XN--MGBA3A4F16A
 XN--MGBAAM7A8H
 XN--MGBAYH7GPA
 XN--MGBBH1A71E
 XN--MGBC0A9AZCG
 XN--MGBERP4A5D4AR
 XN--MGBX4CD0AB
 XN--NGBC5AZD
 XN--O3CW4H
 XN--OGBPF8FL
 XN--P1AI
 XN--PGBS0DH
 XN--Q9JYB4C
 XN--S9BRJ9C
 XN--UNUP4Y
 XN--WGBH1C
 XN--WGBL6A
 XN--XKC2AL3HYE2A
 XN--XKC2DL3A5EE0H
 XN--YFRO4I67O
 XN--YGBI2AMMX
 XXX
 YE
 YT
 ZA
 ZM
 ZW
--- a/lib/guessit/transfo/init.py
+++ b/lib/guessit/transfo/init.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,13 +18,92 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import base_text_type, Guess
 from guessit.patterns import canonical_form
 from guessit.textutils import clean_string
 import logging
 log = logging.getLogger(__name__)
-class TransformerException(Exception):
+def found_property(node, name, confidence):
-    def __init__(self, transformer, message):
+    node.guess = Guess({name: node.clean_value}, confidence=confidence, raw=node.value)
    log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
        # Call the base class constructor with the parameters it needs
        Exception.__init__(self, message)
-        self.transformer = transformer
+def format_guess(guess):
    """Format all the found values to their natural type.
    For instance, a year would be stored as an int value, etc...
    Note that this modifies the dictionary given as input.
    """
    for prop, value in guess.items():
        if prop in ('season', 'episodeNumber', 'year', 'cdNumber',
                    'cdNumberTotal', 'bonusNumber', 'filmNumber'):
            guess[prop] = int(guess[prop])
        elif isinstance(value, base_text_type):
            if prop in ('edition',):
                value = clean_string(value)
            guess[prop] = canonical_form(value).replace('\\', '')
    return guess
 def find_and_split_node(node, strategy, logger):
    string = ' %s ' % node.value # add sentinels
    for matcher, confidence, args, kwargs in strategy:
        all_args = [string]
        if getattr(matcher, 'use_node', False):
            all_args.append(node)
        if args:
            all_args.append(args)
        if kwargs:
            result, span = matcher(*all_args, **kwargs)
        else:
            result, span = matcher(*all_args)
        if result:
            # readjust span to compensate for sentinels
            span = (span[0] - 1, span[1] - 1)
            if isinstance(result, Guess):
                if confidence is None:
                    confidence = result.confidence(list(result.keys())[0])
            else:
                if confidence is None:
                    confidence = 1.0
            guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1]))
            msg = 'Found with confidence %.2f: %s' % (confidence, guess)
            (logger or log).debug(msg)
            node.partition(span)
            absolute_span = (span[0] + node.offset, span[1] + node.offset)
            for child in node.children:
                if child.span == absolute_span:
                    child.guess = guess
                else:
                    find_and_split_node(child, strategy, logger)
            return
 class SingleNodeGuesser(object):
    def __init__(self, guess_func, confidence, logger, *args, **kwargs):
        self.guess_func = guess_func
        self.confidence = confidence
        self.logger = logger
        self.args = args
        self.kwargs = kwargs
    def process(self, mtree):
        # strategy is a list of pairs (guesser, confidence)
        # - if the guesser returns a guessit.Guess and confidence is specified,
        #   it will override it, otherwise it will leave the guess confidence
        # - if the guesser returns a simple dict as a guess and confidence is
        #   specified, it will use it, or 1.0 otherwise
        strategy = [ (self.guess_func, self.confidence, self.args, self.kwargs) ]
        for node in mtree.unidentified_leaves():
            find_and_split_node(node, strategy, self.logger)
--- a/lib/guessit/transfo/guess_bonus_features.py
+++ b/lib/guessit/transfo/guess_bonus_features.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,50 +18,44 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit.transfo import found_property
 import logging
-from guessit.plugins.transformers import Transformer
+log = logging.getLogger(__name__)
 from guessit.matcher import found_property
-class GuessBonusFeatures(Transformer):
+def process(mtree):
-    def __init__(self):
+    def previous_group(g):
-        Transformer.__init__(self, -150)
+        for leaf in mtree.unidentified_leaves()[::-1]:
            if leaf.node_idx < g.node_idx:
                return leaf
-    def supported_properties(self):
+    def next_group(g):
-        return ['bonusNumber', 'bonusTitle', 'filmNumber', 'filmSeries', 'title', 'series']
+        for leaf in mtree.unidentified_leaves():
            if leaf.node_idx > g.node_idx:
                return leaf
-    def process(self, mtree, options=None):
+    def same_group(g1, g2):
-        def previous_group(g):
+        return g1.node_idx[:2] == g2.node_idx[:2]
            for leaf in mtree.unidentified_leaves()[::-1]:
                if leaf.node_idx < g.node_idx:
                    return leaf
-        def next_group(g):
+    bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ]
-            for leaf in mtree.unidentified_leaves():
+    if bonus:
-                if leaf.node_idx > g.node_idx:
+        bonusTitle = next_group(bonus[0])
-                    return leaf
+        if same_group(bonusTitle, bonus[0]):
            found_property(bonusTitle, 'bonusTitle', 0.8)
-        def same_group(g1, g2):
+    filmNumber = [ node for node in mtree.leaves()
-            return g1.node_idx[:2] == g2.node_idx[:2]
+                   if 'filmNumber' in node.guess ]
    if filmNumber:
        filmSeries = previous_group(filmNumber[0])
        found_property(filmSeries, 'filmSeries', 0.9)
-        bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess]
+        title = next_group(filmNumber[0])
-        if bonus:
+        found_property(title, 'title', 0.9)
            bonusTitle = next_group(bonus[0])
            if bonusTitle and same_group(bonusTitle, bonus[0]):
                found_property(bonusTitle, 'bonusTitle', confidence=0.8)
-        filmNumber = [node for node in mtree.leaves()
+    season = [ node for node in mtree.leaves() if 'season' in node.guess ]
-                       if 'filmNumber' in node.guess]
+    if season and 'bonusNumber' in mtree.info:
-        if filmNumber:
+        series = previous_group(season[0])
-            filmSeries = previous_group(filmNumber[0])
+        if same_group(series, season[0]):
-            found_property(filmSeries, 'filmSeries', confidence=0.9)
+            found_property(series, 'series', 0.9)
            title = next_group(filmNumber[0])
            found_property(title, 'title', confidence=0.9)
        season = [node for node in mtree.leaves() if 'season' in node.guess]
        if season and 'bonusNumber' in mtree.info:
            series = previous_group(season[0])
            if same_group(series, season[0]):
                found_property(series, 'series', confidence=0.9)
--- a/lib/guessit/transfo/guess_country.py
+++ b/lib/guessit/transfo/guess_country.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,52 +18,31 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit.plugins.transformers import Transformer
 from guessit.country import Country
 from guessit import Guess
 import logging
 log = logging.getLogger(__name__)
-class GuessCountry(Transformer):
+# list of common words which could be interpreted as countries, but which
-    def __init__(self):
+# are far too common to be able to say they represent a country
-        Transformer.__init__(self, -170)
+country_common_words = frozenset([ 'bt', 'bb' ])
        # list of common words which could be interpreted as countries, but which
        # are far too common to be able to say they represent a country
        self.country_common_words = frozenset(['bt', 'bb'])
-    def supported_properties(self):
+def process(mtree):
-        return ['country']
+    for node in mtree.unidentified_leaves():
        if len(node.node_idx) == 2:
            c = node.value[1:-1].lower()
            if c in country_common_words:
                continue
-    def should_process(self, mtree, options=None):
+            # only keep explicit groups (enclosed in parentheses/brackets)
-        options = options or {}
+            if node.value[0] + node.value[-1] not in ['()', '[]', '{}']:
-        return 'nocountry' not in options.keys()
+                continue
-    def process(self, mtree, options=None):
+            try:
-        for node in mtree.unidentified_leaves():
+                country = Country(c, strict=True)
-            if len(node.node_idx) == 2:
+            except ValueError:
-                c = node.value[1:-1].lower()
+                continue
                if c in self.country_common_words:
                    continue
-                # only keep explicit groups (enclosed in parentheses/brackets)
+            node.guess = Guess(country=country, confidence=1.0, raw=c)
                if not node.is_explicit():
                    continue
                try:
                    country = Country(c, strict=True)
                except ValueError:
                    continue
                node.guess = Guess(country=country, confidence=1.0, input=node.value, span=node.span)
    def post_process(self, mtree, options=None, *args, **kwargs):
        # if country is in the guessed properties, make it part of the series name
        series_leaves = mtree.leaves_containing('series')
        country_leaves = mtree.leaves_containing('country')
        if series_leaves and country_leaves:
            country_leaf = country_leaves[0]
            for serie_leaf in series_leaves:
                serie_leaf.guess['series'] += ' (%s)' % country_leaf.guess['country'].alpha2.upper()
            #result['series'] += ' (%s)' % result['country'].alpha2.upper()
--- a/lib/guessit/transfo/guess_date.py
+++ b/lib/guessit/transfo/guess_date.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,26 +18,21 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
-
+from guessit.transfo import SingleNodeGuesser
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
 from guessit.date import search_date
 import logging
 log = logging.getLogger(__name__)
-class GuessDate(Transformer):
+def guess_date(string):
-    def __init__(self):
+    date, span = search_date(string)
-        Transformer.__init__(self, 50)
+    if date:
        return { 'date': date }, span
    else:
        return None, None
    def supported_properties(self):
        return ['date']
-    def guess_date(self, string, node=None, options=None):
+def process(mtree):
-        date, span = search_date(string)
+    SingleNodeGuesser(guess_date, 1.0, log).process(mtree)
        if date:
            return {'date': date}, span
        else:
            return None, None
    def process(self, mtree, options=None):
        GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/lib/guessit/transfo/guess_episode_info_from_position.py
+++ b/lib/guessit/transfo/guess_episode_info_from_position.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,145 +18,129 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit.transfo import found_property
 from guessit.patterns import non_episode_title, unlikely_series
 import logging
-from guessit.plugins.transformers import Transformer, get_transformer
+log = logging.getLogger(__name__)
 from guessit.textutils import reorder_title
 from guessit.matcher import found_property
-class GuessEpisodeInfoFromPosition(Transformer):
+def match_from_epnum_position(mtree, node):
-    def __init__(self):
+    epnum_idx = node.node_idx
        Transformer.__init__(self, -200)
-    def supported_properties(self):
+    # a few helper functions to be able to filter using high-level semantics
-        return ['title', 'series']
+    def before_epnum_in_same_pathgroup():
        return [ leaf for leaf in mtree.unidentified_leaves()
                 if (leaf.node_idx[0] == epnum_idx[0] and
                     leaf.node_idx[1:] < epnum_idx[1:]) ]
-    def match_from_epnum_position(self, mtree, node):
+    def after_epnum_in_same_pathgroup():
-        epnum_idx = node.node_idx
+        return [ leaf for leaf in mtree.unidentified_leaves()
                 if (leaf.node_idx[0] == epnum_idx[0] and
                     leaf.node_idx[1:] > epnum_idx[1:]) ]
-        # a few helper functions to be able to filter using high-level semantics
+    def after_epnum_in_same_explicitgroup():
-        def before_epnum_in_same_pathgroup():
+        return [ leaf for leaf in mtree.unidentified_leaves()
-            return [leaf for leaf in mtree.unidentified_leaves()
+                 if (leaf.node_idx[:2] == epnum_idx[:2] and
-                     if (leaf.node_idx[0] == epnum_idx[0] and
+                     leaf.node_idx[2:] > epnum_idx[2:]) ]
                         leaf.node_idx[1:] < epnum_idx[1:])]
-        def after_epnum_in_same_pathgroup():
+    # epnumber is the first group and there are only 2 after it in same
-            return [leaf for leaf in mtree.unidentified_leaves()
+    # path group
-                     if (leaf.node_idx[0] == epnum_idx[0] and
+    # -> series title - episode title
-                         leaf.node_idx[1:] > epnum_idx[1:])]
+    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
                         if n.clean_value.lower() not in non_episode_title ]
    if ('title' not in mtree.info and                # no title
        before_epnum_in_same_pathgroup() == [] and   # no groups before
        len(title_candidates) == 2):                 # only 2 groups after
-        def after_epnum_in_same_explicitgroup():
+        found_property(title_candidates[0], 'series', confidence=0.4)
-            return [leaf for leaf in mtree.unidentified_leaves()
+        found_property(title_candidates[1], 'title', confidence=0.4)
-                     if (leaf.node_idx[:2] == epnum_idx[:2] and
+        return
                         leaf.node_idx[2:] > epnum_idx[2:])]
-        # epnumber is the first group and there are only 2 after it in same
+    # if we have at least 1 valid group before the episodeNumber, then it's
-        # path group
+    # probably the series name
-        # -> series title - episode title
+    series_candidates = before_epnum_in_same_pathgroup()
-        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
+    if len(series_candidates) >= 1:
        found_property(series_candidates[0], 'series', confidence=0.7)
-        if ('title' not in mtree.info and  # no title
+    # only 1 group after (in the same path group) and it's probably the
-            before_epnum_in_same_pathgroup() == [] and  # no groups before
+    # episode title
-            len(title_candidates) == 2):  # only 2 groups after
+    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
-
+                         if n.clean_value.lower() not in non_episode_title ]
            found_property(title_candidates[0], 'series', confidence=0.4)
            found_property(title_candidates[1], 'title', confidence=0.4)
            return
        # if we have at least 1 valid group before the episodeNumber, then it's
        # probably the series name
        series_candidates = before_epnum_in_same_pathgroup()
        if len(series_candidates) >= 1:
            found_property(series_candidates[0], 'series', confidence=0.7)
        # only 1 group after (in the same path group) and it's probably the
        # episode title
        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
    if len(title_candidates) == 1:
        found_property(title_candidates[0], 'title', confidence=0.5)
        return
    else:
        # try in the same explicit group, with lower confidence
        title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
                             if n.clean_value.lower() not in non_episode_title
                             ]
        if len(title_candidates) == 1:
-            found_property(title_candidates[0], 'title', confidence=0.5)
+            found_property(title_candidates[0], 'title', confidence=0.4)
            return
        elif len(title_candidates) > 1:
            found_property(title_candidates[0], 'title', confidence=0.3)
            return
        else:
            # try in the same explicit group, with lower confidence
            title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup())
            if len(title_candidates) == 1:
                found_property(title_candidates[0], 'title', confidence=0.4)
                return
            elif len(title_candidates) > 1:
                found_property(title_candidates[0], 'title', confidence=0.3)
                return
-        # get the one with the longest value
+    # get the one with the longest value
-        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
+    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
-        if title_candidates:
+                         if n.clean_value.lower() not in non_episode_title ]
-            maxidx = -1
+    if title_candidates:
-            maxv = -1
+        maxidx = -1
-            for i, c in enumerate(title_candidates):
+        maxv = -1
-                if len(c.clean_value) > maxv:
+        for i, c in enumerate(title_candidates):
-                    maxidx = i
+            if len(c.clean_value) > maxv:
-                    maxv = len(c.clean_value)
+                maxidx = i
-            found_property(title_candidates[maxidx], 'title', confidence=0.3)
+                maxv = len(c.clean_value)
        found_property(title_candidates[maxidx], 'title', confidence=0.3)
    def should_process(self, mtree, options=None):
        options = options or {}
        return not options.get('skip_title') and mtree.guess.get('type', '').startswith('episode')
-    def _filter_candidates(self, candidates):
+def process(mtree):
-        episode_special_transformer = get_transformer('guess_episode_special')
+    eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
-        if episode_special_transformer:
+    if eps:
-            return [n for n in candidates if not episode_special_transformer.container.find_properties(n.value, n, re_match=True)]
+        match_from_epnum_position(mtree, eps[0])
        else:
            return candidates
-    def process(self, mtree, options=None):
+    else:
-        """
+        # if we don't have the episode number, but at least 2 groups in the
-        try to identify the remaining unknown groups by looking at their
+        # basename, then it's probably series - eptitle
-        position relative to other known elements
+        basename = mtree.node_at((-2,))
-        """
+        title_candidates = [ n for n in basename.unidentified_leaves()
-        eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
+                             if n.clean_value.lower() not in non_episode_title
-        if eps:
+                             ]
            self.match_from_epnum_position(mtree, eps[0])
-        else:
+        if len(title_candidates) >= 2:
-            # if we don't have the episode number, but at least 2 groups in the
+            found_property(title_candidates[0], 'series', 0.4)
-            # basename, then it's probably series - eptitle
+            found_property(title_candidates[1], 'title', 0.4)
-            basename = mtree.node_at((-2,))
+        elif len(title_candidates) == 1:
            # but if there's only one candidate, it's probably the series name
            found_property(title_candidates[0], 'series', 0.4)
-            title_candidates = self._filter_candidates(basename.unidentified_leaves())
+    # if we only have 1 remaining valid group in the folder containing the
    # file, then it's likely that it is the series name
    try:
        series_candidates = mtree.node_at((-3,)).unidentified_leaves()
    except ValueError:
        series_candidates = []
-            if len(title_candidates) >= 2:
+    if len(series_candidates) == 1:
-                found_property(title_candidates[0], 'series', confidence=0.4)
+        found_property(series_candidates[0], 'series', 0.3)
                found_property(title_candidates[1], 'title', confidence=0.4)
            elif len(title_candidates) == 1:
                # but if there's only one candidate, it's probably the series name
                found_property(title_candidates[0], 'series', confidence=0.4)
-        # if we only have 1 remaining valid group in the folder containing the
+    # if there's a path group that only contains the season info, then the
-        # file, then it's likely that it is the series name
+    # previous one is most likely the series title (ie: ../series/season X/..)
-        try:
+    eps = [ node for node in mtree.nodes()
-            series_candidates = mtree.node_at((-3,)).unidentified_leaves()
+            if 'season' in node.guess and 'episodeNumber' not in node.guess ]
        except ValueError:
            series_candidates = []
-        if len(series_candidates) == 1:
+    if eps:
-            found_property(series_candidates[0], 'series', confidence=0.3)
+        previous = [ node for node in mtree.unidentified_leaves()
                     if node.node_idx[0] == eps[0].node_idx[0] - 1 ]
        if len(previous) == 1:
            found_property(previous[0], 'series', 0.5)
-        # if there's a path group that only contains the season info, then the
+    # reduce the confidence of unlikely series
-        # previous one is most likely the series title (ie: ../series/season X/..)
+    for node in mtree.nodes():
-        eps = [node for node in mtree.nodes()
+        if 'series' in node.guess:
-               if 'season' in node.guess and 'episodeNumber' not in node.guess]
+            if node.guess['series'].lower() in unlikely_series:
-
+                new_confidence = node.guess.confidence('series') * 0.5
-        if eps:
+                node.guess.set_confidence('series', new_confidence)
            previous = [node for node in mtree.unidentified_leaves()
                        if node.node_idx[0] == eps[0].node_idx[0] - 1]
            if len(previous) == 1:
                found_property(previous[0], 'series', confidence=0.5)
    def post_process(self, mtree, options=None):
        for node in mtree.nodes():
            if 'series' not in node.guess:
                continue
            node.guess['series'] = reorder_title(node.guess['series'])
--- a/lib/guessit/transfo/guess_episode_special.py
+++ b/lib/guessit/transfo/guess_episode_special.py
@ -1,62 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import found_guess
 from guessit.containers import PropertiesContainer
 class GuessEpisodeSpecial(Transformer):
    def __init__(self):
        Transformer.__init__(self, -205)
        self.container = PropertiesContainer()
        self.container.register_property('special', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired')
        self.container.register_property('special', 'Extras?', canonical_form='Extras')
    def guess_special(self, string, node=None, options=None):
        properties = self.container.find_properties(string, node, 'special', multiple=True)
        guesses = self.container.as_guess(properties, multiple=True)
        return guesses
    def second_pass_options(self, mtree, options=None):
        if not mtree.guess.get('type', '').startswith('episode'):
            for unidentified_leaf in mtree.unidentified_leaves():
                properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, 'special')
                guess = self.container.as_guess(properties)
                if guess:
                    return {'type': 'episode'}
        return None
    def supported_properties(self):
        return self.container.get_supported_properties()
    def process(self, mtree, options=None):
        if mtree.guess.get('type', '').startswith('episode') and (not mtree.info.get('episodeNumber') or mtree.info.get('season') == 0):
            for title_leaf in mtree.leaves_containing('title'):
                guesses = self.guess_special(title_leaf.value, title_leaf, options)
                for guess in guesses:
                    found_guess(title_leaf, guess, update_guess=False)
            for unidentified_leaf in mtree.unidentified_leaves():
                guesses = self.guess_special(unidentified_leaf.value, unidentified_leaf, options)
                for guess in guesses:
                    found_guess(unidentified_leaf, guess, update_guess=False)
        return None
--- a/lib/guessit/transfo/guess_episodes_rexps.py
+++ b/lib/guessit/transfo/guess_episodes_rexps.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,63 +18,49 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import Guess
 from guessit.transfo import SingleNodeGuesser
 from guessit.patterns import episode_rexps
 import re
 import logging
-from guessit.plugins.transformers import Transformer
+log = logging.getLogger(__name__)
-from guessit.matcher import GuessFinder
+
-from guessit.patterns import sep
+def number_list(s):
-from guessit.containers import PropertiesContainer, WeakValidator, NoValidator
+    l = [ int(n) for n in re.sub('[^0-9]+', ' ', s).split() ]
-from guessit.patterns.numeral import numeral, digital_numeral, parse_numeral
+
-from re import split as re_split
+    if len(l) == 2:
        # it is an episode interval, return all numbers in between
        return range(l[0], l[1]+1)
    return l
 def guess_episodes_rexps(string):
    for rexp, confidence, span_adjust in episode_rexps:
        match = re.search(rexp, string, re.IGNORECASE)
        if match:
            span = (match.start() + span_adjust[0], 
                    match.end() + span_adjust[1])
            guess = Guess(match.groupdict(), confidence=confidence, raw=string[span[0]:span[1]])
            # decide whether we have only a single episode number or an
            # episode list
            if guess.get('episodeNumber'):
                eplist = number_list(guess['episodeNumber'])
                guess.set('episodeNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
                if len(eplist) > 1:
                    guess.set('episodeList', eplist, confidence=confidence, raw=string[span[0]:span[1]])
            if guess.get('bonusNumber'):
                eplist = number_list(guess['bonusNumber'])
                guess.set('bonusNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
            return guess, span
    return None, None
-class GuessEpisodesRexps(Transformer):
+def process(mtree):
-    def __init__(self):
+    SingleNodeGuesser(guess_episodes_rexps, None, log).process(mtree)
        Transformer.__init__(self, 20)
        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
        def episode_parser(value):
            values = re_split('[a-zA-Z]', value)
            values = [x for x in values if x]
            ret = []
            for letters_elt in values:
                dashed_values = letters_elt.split('-')
                dashed_values = [x for x in dashed_values if x]
                if len(dashed_values) > 1:
                    for _ in range(0, len(dashed_values) - 1):
                        start_dash_ep = parse_numeral(dashed_values[0])
                        end_dash_ep = parse_numeral(dashed_values[1])
                        for dash_ep in range(start_dash_ep, end_dash_ep + 1):
                            ret.append(dash_ep)
                else:
                    ret.append(parse_numeral(letters_elt))
            if len(ret) > 1:
                return {None: ret[0], 'episodeList': ret}  # TODO: Should support seasonList also
            elif len(ret) > 0:
                return ret[0]
            else:
                return None
        self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral)
        self.container.register_property(None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator())
        self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator())
        self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral)
        self.container.register_property(None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral)
        self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())
    def supported_properties(self):
        return ['episodeNumber', 'season']
    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node)
        return self.container.as_guess(found, string)
    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')
    def process(self, mtree, options=None):
        GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/lib/guessit/transfo/guess_filetype.py
+++ b/lib/guessit/transfo/guess_filetype.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,196 +18,182 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
-
+from guessit import Guess
-import mimetypes
+from guessit.patterns import (subtitle_exts, info_exts, video_exts, episode_rexps,
                              find_properties, compute_canonical_form)
 from guessit.date import valid_year
 from guessit.textutils import clean_string
 import os.path
 import re
 import mimetypes
 import logging
-from guessit.guess import Guess
+log = logging.getLogger(__name__)
 from guessit.patterns.extension import subtitle_exts, info_exts, video_exts
 from guessit.transfo import TransformerException
 from guessit.plugins.transformers import Transformer, get_transformer
 from guessit.matcher import log_found_guess, found_guess
 from guessit.textutils import clean_string
 # List of well known movies and series, hardcoded because they cannot be
 # guessed appropriately otherwise
 MOVIES = [ 'OSS 117' ]
 SERIES = [ 'Band of Brothers' ]
-class GuessFiletype(Transformer):
+MOVIES = [ m.lower() for m in MOVIES ]
-    def __init__(self):
+SERIES = [ s.lower() for s in SERIES ]
        Transformer.__init__(self, 250)
-    # List of well known movies and series, hardcoded because they cannot be
+def guess_filetype(mtree, filetype):
-    # guessed appropriately otherwise
+    # put the filetype inside a dummy container to be able to have the
-    MOVIES = ['OSS 117']
+    # following functions work correctly as closures
-    SERIES = ['Band of Brothers']
+    # this is a workaround for python 2 which doesn't have the
    # 'nonlocal' keyword (python 3 does have it)
    filetype_container = [filetype]
    other = {}
    filename = mtree.string
-    MOVIES = [m.lower() for m in MOVIES]
+    def upgrade_episode():
-    SERIES = [s.lower() for s in SERIES]
+        if filetype_container[0] == 'video':
            filetype_container[0] = 'episode'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'episodesubtitle'
        elif filetype_container[0] == 'info':
            filetype_container[0] = 'episodeinfo'
-    def guess_filetype(self, mtree, options=None):
+    def upgrade_movie():
-        options = options or {}
+        if filetype_container[0] == 'video':
            filetype_container[0] = 'movie'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'moviesubtitle'
        elif filetype_container[0] == 'info':
            filetype_container[0] = 'movieinfo'
-        # put the filetype inside a dummy container to be able to have the
+    def upgrade_subtitle():
-        # following functions work correctly as closures
+        if 'movie' in filetype_container[0]:
-        # this is a workaround for python 2 which doesn't have the
+            filetype_container[0] = 'moviesubtitle'
-        # 'nonlocal' keyword which we could use here in the upgrade_* functions
+        elif 'episode' in filetype_container[0]:
-        # (python 3 does have it)
+            filetype_container[0] = 'episodesubtitle'
        filetype_container = [mtree.guess.get('type')]
        other = {}
        filename = mtree.string
        def upgrade_episode():
            if filetype_container[0] == 'subtitle':
                filetype_container[0] = 'episodesubtitle'
            elif filetype_container[0] == 'info':
                filetype_container[0] = 'episodeinfo'
            elif not filetype_container[0]:
                filetype_container[0] = 'episode'
        def upgrade_movie():
            if filetype_container[0] == 'subtitle':
                filetype_container[0] = 'moviesubtitle'
            elif filetype_container[0] == 'info':
                filetype_container[0] = 'movieinfo'
            elif not filetype_container[0]:
                filetype_container[0] = 'movie'
        def upgrade_subtitle():
            if filetype_container[0] == 'movie':
                filetype_container[0] = 'moviesubtitle'
            elif filetype_container[0] == 'episode':
                filetype_container[0] = 'episodesubtitle'
            elif not filetype_container[0]:
                filetype_container[0] = 'subtitle'
        def upgrade_info():
            if filetype_container[0] == 'movie':
                filetype_container[0] = 'movieinfo'
            elif filetype_container[0] == 'episode':
                filetype_container[0] = 'episodeinfo'
            elif not filetype_container[0]:
                filetype_container[0] = 'info'
        # look at the extension first
        fileext = os.path.splitext(filename)[1][1:].lower()
        if fileext in subtitle_exts:
            upgrade_subtitle()
            other = {'container': fileext}
        elif fileext in info_exts:
            upgrade_info()
            other = {'container': fileext}
        elif fileext in video_exts:
            other = {'container': fileext}
        else:
-            if fileext and not options.get('name_only'):
+            filetype_container[0] = 'subtitle'
                other = {'extension': fileext}
-        # check whether we are in a 'Movies', 'Tv Shows', ... folder
+    def upgrade_info():
-        folder_rexps = [
+        if 'movie' in filetype_container[0]:
-                        (r'Movies?', upgrade_movie),
+            filetype_container[0] = 'movieinfo'
-                        (r'Films?', upgrade_movie),
+        elif 'episode' in filetype_container[0]:
-                        (r'Tv[ _-]?Shows?', upgrade_episode),
+            filetype_container[0] = 'episodeinfo'
-                        (r'Series?', upgrade_episode),
+        else:
-                        (r'Episodes?', upgrade_episode),
+            filetype_container[0] = 'info'
                        ]
        for frexp, upgrade_func in folder_rexps:
            frexp = re.compile(frexp, re.IGNORECASE)
            for pathgroup in mtree.children:
                if frexp.match(pathgroup.value):
                    upgrade_func()
                    return filetype_container[0], other
-        # check for a few specific cases which will unintentionally make the
+    def upgrade(type='unknown'):
-        # following heuristics confused (eg: OSS 117 will look like an episode,
+        if filetype_container[0] == 'autodetect':
-        # season 1, epnum 17, when it is in fact a movie)
+            filetype_container[0] = type
        fname = clean_string(filename).lower()
        for m in self.MOVIES:
            if m in fname:
                self.log.debug('Found in exception list of movies -> type = movie')
                upgrade_movie()
                return filetype_container[0], other
        for s in self.SERIES:
            if s in fname:
                self.log.debug('Found in exception list of series -> type = episode')
                upgrade_episode()
                return filetype_container[0], other
        # now look whether there are some specific hints for episode vs movie
        # if we have an episode_rexp (eg: s02e13), it is an episode
        episode_transformer = get_transformer('guess_episodes_rexps')
        if episode_transformer:
            guess = episode_transformer.guess_episodes_rexps(filename)
            if guess:
                self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess)
                upgrade_episode()
                return filetype_container[0], other
-        properties_transformer = get_transformer('guess_properties')
+    # look at the extension first
-        if properties_transformer:
+    fileext = os.path.splitext(filename)[1][1:].lower()
-            # if we have certain properties characteristic of episodes, it is an ep
+    if fileext in subtitle_exts:
-            found = properties_transformer.container.find_properties(filename, mtree, 'episodeFormat')
+        upgrade_subtitle()
-            guess = properties_transformer.container.as_guess(found, filename)
+        other = { 'container': fileext }
-            if guess:
+    elif fileext in info_exts:
-                self.log.debug('Found characteristic property of episodes: %s"', guess)
+        upgrade_info()
-                upgrade_episode()
+        other = { 'container': fileext }
-                return filetype_container[0], other
+    elif fileext in video_exts:
        upgrade(type='video')
        other = { 'container': fileext }
    else:
        upgrade(type='unknown')
        other = { 'extension': fileext }
            found = properties_transformer.container.find_properties(filename, mtree, 'format')
            guess = properties_transformer.container.as_guess(found, filename)
            if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'):
                # Use weak episodes only if TV or WEB source
                weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
                if weak_episode_transformer:
                    guess = weak_episode_transformer.guess_weak_episodes_rexps(filename)
                    if guess:
                        self.log.debug('Found guess_weak_episodes_rexps: %s -> type = episode', guess)
                        upgrade_episode()
                        return filetype_container[0], other
        website_transformer = get_transformer('guess_website')
        if website_transformer:
            found = website_transformer.container.find_properties(filename, mtree, 'website')
            guess = website_transformer.container.as_guess(found, filename)
            if guess:
                for namepart in ('tv', 'serie', 'episode'):
                    if namepart in guess['website']:
                        # origin-specific type
                        self.log.debug('Found characteristic property of episodes: %s', guess)
                        upgrade_episode()
                        return filetype_container[0], other
-        if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts):
+    # check whether we are in a 'Movies', 'Tv Shows', ... folder
-            # if no episode info found, assume it's a movie
+    folder_rexps = [ (r'Movies?', upgrade_movie),
-            self.log.debug('Nothing characteristic found, assuming type = movie')
+                     (r'Tv[ _-]?Shows?', upgrade_episode),
                     (r'Series', upgrade_episode)
                     ]
    for frexp, upgrade_func in folder_rexps:
        frexp = re.compile(frexp, re.IGNORECASE)
        for pathgroup in mtree.children:
            if frexp.match(pathgroup.value):
                upgrade_func()
    # check for a few specific cases which will unintentionally make the
    # following heuristics confused (eg: OSS 117 will look like an episode,
    # season 1, epnum 17, when it is in fact a movie)
    fname = clean_string(filename).lower()
    for m in MOVIES:
        if m in fname:
            log.debug('Found in exception list of movies -> type = movie')
            upgrade_movie()
    for s in SERIES:
        if s in fname:
            log.debug('Found in exception list of series -> type = episode')
            upgrade_episode()
-        if not filetype_container[0]:
+    # now look whether there are some specific hints for episode vs movie
-            self.log.debug('Nothing characteristic found, assuming type = unknown')
+    if filetype_container[0] in ('video', 'subtitle', 'info'):
-            filetype_container[0] = 'unknown'
+        # if we have an episode_rexp (eg: s02e13), it is an episode
        for rexp, _, _ in episode_rexps:
            match = re.search(rexp, filename, re.IGNORECASE)
            if match:
                log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group())
                upgrade_episode()
                break
-        return filetype_container[0], other
+        # if we have a 3-4 digit number that's not a year, maybe an episode
        match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
        if match:
            fullnumber = int(match.group()[1:-1])
            #season = fullnumber // 100
            epnumber = fullnumber % 100
            possible = True
-    def process(self, mtree, options=None):
+            # check for validity
-        """guess the file type now (will be useful later)
+            if epnumber > 40:
-        """
+                possible = False
-        filetype, other = self.guess_filetype(mtree, options)
+            if valid_year(fullnumber):
                possible = False
-        mtree.guess.set('type', filetype, confidence=1.0)
+            if possible:
-        log_found_guess(mtree.guess)
+                log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group())
                upgrade_episode()
-        filetype_info = Guess(other, confidence=1.0)
+        # if we have certain properties characteristic of episodes, it is an ep
-        # guess the mimetype of the filename
+        for prop, value, _, _ in find_properties(filename):
-        # TODO: handle other mimetypes not found on the default type_maps
+            log.debug('prop: %s = %s' % (prop, value))
-        # mimetypes.types_map['.srt']='text/subtitle'
+            if prop == 'episodeFormat':
-        mime, _ = mimetypes.guess_type(mtree.string, strict=False)
+                log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
-        if mime is not None:
+                upgrade_episode()
-            filetype_info.update({'mimetype': mime}, confidence=1.0)
+                break
-        node_ext = mtree.node_at((-1,))
+            elif compute_canonical_form('format', value) == 'DVB':
-        found_guess(node_ext, filetype_info)
+                log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
                upgrade_episode()
                break
-        if mtree.guess.get('type') in [None, 'unknown']:
+        # origin-specific type
-            if options.get('name_only'):
+        if 'tvu.org.ru' in filename:
-                mtree.guess.set('type', 'movie', confidence=0.6)
+            log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
-            else:
+            upgrade_episode()
-                raise TransformerException(__name__, 'Unknown file type')
+
        # if no episode info found, assume it's a movie
        log.debug('Nothing characteristic found, assuming type = movie')
        upgrade_movie()
    filetype = filetype_container[0]
    return filetype, other
 def process(mtree, filetype='autodetect'):
    filetype, other = guess_filetype(mtree, filetype)
    mtree.guess.set('type', filetype, confidence=1.0)
    log.debug('Found with confidence %.2f: %s' % (1.0, mtree.guess))
    filetype_info = Guess(other, confidence=1.0)
    # guess the mimetype of the filename
    # TODO: handle other mimetypes not found on the default type_maps
    # mimetypes.types_map['.srt']='text/subtitle'
    mime, _ = mimetypes.guess_type(mtree.string, strict=False)
    if mime is not None:
        filetype_info.update({'mimetype': mime}, confidence=1.0)
    node_ext = mtree.node_at((-1,))
    node_ext.guess = filetype_info
    log.debug('Found with confidence %.2f: %s' % (1.0, node_ext.guess))
--- a/lib/guessit/transfo/guess_idnumber.py
+++ b/lib/guessit/transfo/guess_idnumber.py
@ -18,52 +18,54 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
-
+from guessit.transfo import SingleNodeGuesser
-from guessit.plugins.transformers import Transformer
+from guessit.patterns import find_properties
 from guessit.matcher import GuessFinder
 import re
 import logging
 log = logging.getLogger(__name__)
-class GuessIdnumber(Transformer):
+def guess_properties(string):
-    def __init__(self):
+    try:
-        Transformer.__init__(self, -180)
+        prop, value, pos, end = find_properties(string)[0]
-
+        return { prop: value }, (pos, end)
-    def supported_properties(self):
+    except IndexError:
        return ['idNumber']
    _idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{20,})')  # 1.0, (0, 0))
    def guess_idnumber(self, string, node=None, options=None):
        match = self._idnum.search(string)
        if match is not None:
            result = match.groupdict()
            switch_count = 0
            DIGIT = 0
            LETTER = 1
            OTHER = 2
            last = LETTER
            for c in result['idNumber']:
                if c in '0123456789':
                    ci = DIGIT
                elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
                    ci = LETTER
                else:
                    ci = OTHER
                if ci != last:
                    switch_count += 1
                last = ci
            switch_ratio = float(switch_count) / len(result['idNumber'])
            # only return the result as probable if we alternate often between
            # char type (more likely for hash values than for common words)
            if switch_ratio > 0.4:
                return result, match.span()
        return None, None
-    def process(self, mtree, options=None):
+_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{10,})') # 1.0, (0, 0))
-        GuessFinder(self.guess_idnumber, 0.4, self.log, options).process_nodes(mtree.unidentified_leaves())
+
 def guess_idnumber(string):
    match = _idnum.search(string)
    if match is not None:
        result = match.groupdict()
        switch_count = 0
        DIGIT = 0
        LETTER = 1
        OTHER = 2
        last = LETTER
        for c in result['idNumber']:
            if c in '0123456789':
                ci = DIGIT
            elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
                ci = LETTER
            else:
                ci = OTHER
            if ci != last:
                switch_count += 1
            last = ci
        switch_ratio = float(switch_count) / len(result['idNumber'])
        # only return the result as probable if we alternate often between
        # char type (more likely for hash values than for common words)
        if switch_ratio > 0.4:
            return result, match.span()
    return None, None
 def process(mtree):
    SingleNodeGuesser(guess_idnumber, 0.4, log).process(mtree)
--- a/lib/guessit/transfo/guess_language.py
+++ b/lib/guessit/transfo/guess_language.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,152 +18,38 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import Guess
 from guessit.transfo import SingleNodeGuesser
 from guessit.language import search_language
 import logging
-from guessit.language import search_language, subtitle_prefixes, subtitle_suffixes
+log = logging.getLogger(__name__)
 from guessit.patterns.extension import subtitle_exts
 from guessit.textutils import clean_string, find_words
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
-class GuessLanguage(Transformer):
+def guess_language(string, node, skip=None):
-    def __init__(self):
+    if skip:
-        Transformer.__init__(self, 30)
+        relative_skip = []
        for entry in skip:
            node_idx = entry['node_idx']
            span = entry['span']
            if node_idx == node.node_idx[:len(node_idx)]:
                relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1)
                relative_skip.append(relative_span)
        skip = relative_skip
-    def supported_properties(self):
+    language, span, confidence = search_language(string, skip=skip)
-        return ['language', 'subtitleLanguage']
+    if language:
        return (Guess({'language': language},
                      confidence=confidence,
                      raw= string[span[0]:span[1]]),
                span)
-    def guess_language(self, string, node=None, options=None):
+    return None, None
        guess = search_language(string)
        return guess
-    def _skip_language_on_second_pass(self, mtree, node):
+guess_language.use_node = True
        """Check if found node is a valid language node, or if it's a false positive.
        :param mtree: Tree detected on first pass.
        :type mtree: :class:`guessit.matchtree.MatchTree`
        :param node: Node that contains a language Guess
        :type node: :class:`guessit.matchtree.MatchTree`
-        :return: True if a second pass skipping this node is required
+def process(mtree, *args, **kwargs):
-        :rtype: bool
+    SingleNodeGuesser(guess_language, None, log, *args, **kwargs).process(mtree)
-        """
+    # Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo
        unidentified_starts = {}
        unidentified_ends = {}
        property_starts = {}
        property_ends = {}
        title_starts = {}
        title_ends = {}
        for unidentified_node in mtree.unidentified_leaves():
            unidentified_starts[unidentified_node.span[0]] = unidentified_node
            unidentified_ends[unidentified_node.span[1]] = unidentified_node
        for property_node in mtree.leaves_containing('year'):
            property_starts[property_node.span[0]] = property_node
            property_ends[property_node.span[1]] = property_node
        for title_node in mtree.leaves_containing(['title', 'series']):
            title_starts[title_node.span[0]] = title_node
            title_ends[title_node.span[1]] = title_node
        return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\
                node.span[1] in title_starts.keys() and (node.span[0] == 0 or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys())
    def second_pass_options(self, mtree, options=None):
        m = mtree.matched()
        to_skip_language_nodes = []
        for lang_key in ('language', 'subtitleLanguage'):
            langs = {}
            lang_nodes = set(n for n in mtree.leaves_containing(lang_key))
            for lang_node in lang_nodes:
                lang = lang_node.guess.get(lang_key, None)
                if self._skip_language_on_second_pass(mtree, lang_node):
                    # Language probably split the title. Add to skip for 2nd pass.
                    # if filetype is subtitle and the language appears last, just before
                    # the extension, then it is likely a subtitle language
                    parts = clean_string(lang_node.root.value).split()
                    if (m.get('type') in ['moviesubtitle', 'episodesubtitle'] and
                        (parts.index(lang_node.value) == len(parts) - 2)):
                        continue
                    to_skip_language_nodes.append(lang_node)
                elif not lang in langs:
                    langs[lang] = lang_node
                else:
                    # The same language was found. Keep the more confident one,
                    # and add others to skip for 2nd pass.
                    existing_lang_node = langs[lang]
                    to_skip = None
                    if (existing_lang_node.guess.confidence('language') >=
                        lang_node.guess.confidence('language')):
                        # lang_node is to remove
                        to_skip = lang_node
                    else:
                        # existing_lang_node is to remove
                        langs[lang] = lang_node
                        to_skip = existing_lang_node
                    to_skip_language_nodes.append(to_skip)
        if to_skip_language_nodes:
            return {'skip_nodes': to_skip_language_nodes}
        return None
    def should_process(self, mtree, options=None):
        options = options or {}
        return 'nolanguage' not in options
    def process(self, mtree, options=None):
        GuessFinder(self.guess_language, None, self.log, options).process_nodes(mtree.unidentified_leaves())
    def promote_subtitle(self, node):
        node.guess.set('subtitleLanguage', node.guess['language'],
                       confidence=node.guess.confidence('language'))
        del node.guess['language']
    def post_process(self, mtree, options=None):
        # 1- try to promote language to subtitle language where it makes sense
        for node in mtree.nodes():
            if 'language' not in node.guess:
                continue
            # - if we matched a language in a file with a sub extension and that
            #   the group is the last group of the filename, it is probably the
            #   language of the subtitle
            #   (eg: 'xxx.english.srt')
            if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
                node == mtree.leaves()[-2]):
                self.promote_subtitle(node)
            # - if we find in the same explicit group
            # a subtitle prefix before the language,
            # or a subtitle suffix after the language,
            # then upgrade the language
            explicit_group = mtree.node_at(node.node_idx[:2])
            group_str = explicit_group.value.lower()
            for sub_prefix in subtitle_prefixes:
                if (sub_prefix in find_words(group_str) and
                    0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])):
                    self.promote_subtitle(node)
            for sub_suffix in subtitle_suffixes:
                if (sub_suffix in find_words(group_str) and
                    (node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
                    self.promote_subtitle(node)
            # - if a language is in an explicit group just preceded by "st",
            #   it is a subtitle language (eg: '...st[fr-eng]...')
            try:
                idx = node.node_idx
                previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
                if previous.value.lower()[-2:] == 'st':
                    self.promote_subtitle(node)
            except IndexError:
                pass
--- a/lib/guessit/transfo/guess_movie_title_from_position.py
+++ b/lib/guessit/transfo/guess_movie_title_from_position.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,160 +18,157 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import Guess
 import unicodedata
 import logging
-from guessit.plugins.transformers import Transformer
+log = logging.getLogger(__name__)
 from guessit.matcher import found_property
 from guessit import u
-class GuessMovieTitleFromPosition(Transformer):
+def process(mtree):
-    def __init__(self):
+    def found_property(node, name, value, confidence):
-        Transformer.__init__(self, -200)
+        node.guess = Guess({ name: value },
                           confidence=confidence,
                           raw=value)
        log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
-    def supported_properties(self):
+    def found_title(node, confidence):
-        return ['title']
+        found_property(node, 'title', node.clean_value, confidence)
-    def should_process(self, mtree, options=None):
+    basename = mtree.node_at((-2,))
-        options = options or {}
+    all_valid = lambda leaf: len(leaf.clean_value) > 0
-        return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode')
+    basename_leftover = basename.unidentified_leaves(valid=all_valid)
-    def process(self, mtree, options=None):
+    try:
-        """
+        folder = mtree.node_at((-3,))
-        try to identify the remaining unknown groups by looking at their
+        folder_leftover = folder.unidentified_leaves()
-        position relative to other known elements
+    except ValueError:
-        """
+        folder = None
-        basename = mtree.node_at((-2,))
+        folder_leftover = []
        all_valid = lambda leaf: len(leaf.clean_value) > 0
        basename_leftover = basename.unidentified_leaves(valid=all_valid)
-        try:
+    log.debug('folder: %s' % folder_leftover)
    log.debug('basename: %s' % basename_leftover)
    # specific cases:
    # if we find the same group both in the folder name and the filename,
    # it's a good candidate for title
    if (folder_leftover and basename_leftover and
        folder_leftover[0].clean_value == basename_leftover[0].clean_value):
        found_title(folder_leftover[0], confidence=0.8)
        return
    # specific cases:
    # if the basename contains a number first followed by an unidentified
    # group, and the folder only contains 1 unidentified one, then we have
    # a series
    # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
    try:
        series = folder_leftover[0]
        filmNumber = basename_leftover[0]
        title = basename_leftover[1]
        basename_leaves = basename.leaves()
        num = int(filmNumber.clean_value)
        log.debug('series: %s' % series.clean_value)
        log.debug('title: %s' % title.clean_value)
        if (series.clean_value != title.clean_value and
            series.clean_value != filmNumber.clean_value and
            basename_leaves.index(filmNumber) == 0 and
            basename_leaves.index(title) == 1):
            found_title(title, confidence=0.6)
            found_property(series, 'filmSeries',
                           series.clean_value, confidence=0.6)
            found_property(filmNumber, 'filmNumber',
                           num, confidence=0.6)
        return
    except Exception:
        pass
    # specific cases:
    #  - movies/tttttt (yyyy)/tttttt.ccc
    try:
        if mtree.node_at((-4, 0)).value.lower() == 'movies':
            folder = mtree.node_at((-3,))
            folder_leftover = folder.unidentified_leaves()
        except ValueError:
            folder = None
            folder_leftover = []
-        self.log.debug('folder: %s' % u(folder_leftover))
+            # Note:too generic, might solve all the unittests as they all
-        self.log.debug('basename: %s' % u(basename_leftover))
+            # contain 'movies' in their path
            #
            #if containing_folder.is_leaf() and not containing_folder.guess:
            #    containing_folder.guess =
            #        Guess({ 'title': clean_string(containing_folder.value) },
            #              confidence=0.7)
-        # specific cases:
+            year_group = folder.first_leaf_containing('year')
-        # if we find the same group both in the folder name and the filename,
+            groups_before = folder.previous_unidentified_leaves(year_group)
        # it's a good candidate for title
        if (folder_leftover and basename_leftover and
            folder_leftover[0].clean_value == basename_leftover[0].clean_value):
-            found_property(folder_leftover[0], 'title', confidence=0.8)
+            found_title(groups_before[0], confidence=0.8)
            return
-        # specific cases:
+    except Exception:
-        # if the basename contains a number first followed by an unidentified
+        pass
        # group, and the folder only contains 1 unidentified one, then we have
        # a series
        # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
        try:
            series = folder_leftover[0]
            filmNumber = basename_leftover[0]
            title = basename_leftover[1]
-            basename_leaves = basename.leaves()
+    # if we have either format or videoCodec in the folder containing the file
    # or one of its parents, then we should probably look for the title in
    # there rather than in the basename
    try:
        props = mtree.previous_leaves_containing(mtree.children[-2],
                                                 [ 'videoCodec', 'format',
                                                   'language' ])
    except IndexError:
        props = []
-            num = int(filmNumber.clean_value)
+    if props:
        group_idx = props[0].node_idx[0]
        if all(g.node_idx[0] == group_idx for g in props):
            # if they're all in the same group, take leftover info from there
            leftover = mtree.node_at((group_idx,)).unidentified_leaves()
-            self.log.debug('series: %s' % series.clean_value)
+            if leftover:
-            self.log.debug('title: %s' % title.clean_value)
+                found_title(leftover[0], confidence=0.7)
            if (series.clean_value != title.clean_value and
                series.clean_value != filmNumber.clean_value and
                basename_leaves.index(filmNumber) == 0 and
                basename_leaves.index(title) == 1):
                found_property(title, 'title', confidence=0.6)
                found_property(series, 'filmSeries', confidence=0.6)
                found_property(filmNumber, 'filmNumber', num, confidence=0.6)
            return
        except Exception:
            pass
        # specific cases:
        #  - movies/tttttt (yyyy)/tttttt.ccc
        try:
            if mtree.node_at((-4, 0)).value.lower() == 'movies':
                folder = mtree.node_at((-3,))
                # Note:too generic, might solve all the unittests as they all
                # contain 'movies' in their path
                #
                # if containing_folder.is_leaf() and not containing_folder.guess:
                #    containing_folder.guess =
                #        Guess({ 'title': clean_string(containing_folder.value) },
                #              confidence=0.7)
                year_group = folder.first_leaf_containing('year')
                groups_before = folder.previous_unidentified_leaves(year_group)
                found_property(groups_before[0], 'title', confidence=0.8)
                return
-        except Exception:
+    # look for title in basename if there are some remaining undidentified
-            pass
+    # groups there
    if basename_leftover:
        title_candidate = basename_leftover[0]
-        # if we have either format or videoCodec in the folder containing the file
+        # if basename is only one word and the containing folder has at least
-        # or one of its parents, then we should probably look for the title in
+        # 3 words in it, we should take the title from the folder name
-        # there rather than in the basename
+        # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
-        try:
+        # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi  <-- TODO: gets caught here?
-            props = mtree.previous_leaves_containing(mtree.children[-2],
+        if (title_candidate.clean_value.count(' ') == 0 and
-                                                     ['videoCodec', 'format',
+            folder_leftover and
-                                                       'language'])
+            folder_leftover[0].clean_value.count(' ') >= 2):
        except IndexError:
            props = []
-        if props:
+            found_title(folder_leftover[0], confidence=0.7)
            group_idx = props[0].node_idx[0]
            if all(g.node_idx[0] == group_idx for g in props):
                # if they're all in the same group, take leftover info from there
                leftover = mtree.node_at((group_idx,)).unidentified_leaves()
                if leftover:
                    found_property(leftover[0], 'title', confidence=0.7)
                    return
        # look for title in basename if there are some remaining unidentified
        # groups there
        if basename_leftover:
            # if basename is only one word and the containing folder has at least
            # 3 words in it, we should take the title from the folder name
            # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
            # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi  <-- TODO: gets caught here?
            if (basename_leftover[0].clean_value.count(' ') == 0 and
                folder_leftover and
                folder_leftover[0].clean_value.count(' ') >= 2):
                found_property(folder_leftover[0], 'title', confidence=0.7)
                return
            # if there are only many unidentified groups, take the first of which is
            # not inside brackets or parentheses.
            # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
            if basename_leftover[0].is_explicit():
                for basename_leftover_elt in basename_leftover:
                    if not basename_leftover_elt.is_explicit():
                        found_property(basename_leftover_elt, 'title', confidence=0.8)
                        return
            # if all else fails, take the first remaining unidentified group in the
            # basename as title
            found_property(basename_leftover[0], 'title', confidence=0.6)
            return
-        # if there are no leftover groups in the basename, look in the folder name
+        # if there are only 2 unidentified groups, the first of which is inside
-        if folder_leftover:
+        # brackets or parentheses, we take the second one for the title:
-            found_property(folder_leftover[0], 'title', confidence=0.5)
+        # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
        if len(basename_leftover) == 2 and basename_leftover[0].is_explicit():
            found_title(basename_leftover[1], confidence=0.8)
            return
-        # if nothing worked, look if we have a very small group at the beginning
+        # if all else fails, take the first remaining unidentified group in the
-        # of the basename
+        # basename as title
-        basename = mtree.node_at((-2,))
+        found_title(title_candidate, confidence=0.6)
-        basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
+        return
-        if basename_leftover:
+
-            found_property(basename_leftover[0], 'title', confidence=0.4)
+    # if there are no leftover groups in the basename, look in the folder name
-            return
+    if folder_leftover:
        found_title(folder_leftover[0], confidence=0.5)
        return
    # if nothing worked, look if we have a very small group at the beginning
    # of the basename
    basename = mtree.node_at((-2,))
    basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
    if basename_leftover:
        found_title(basename_leftover[0], confidence=0.4)
        return
--- a/lib/guessit/transfo/guess_properties.py
+++ b/lib/guessit/transfo/guess_properties.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,213 +18,21 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit.transfo import SingleNodeGuesser
 from guessit.patterns import find_properties
 import logging
-from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer
+log = logging.getLogger(__name__)
 from guessit.patterns.extension import subtitle_exts, video_exts, info_exts
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
-class GuessProperties(Transformer):
+def guess_properties(string):
-    def __init__(self):
+    try:
-        Transformer.__init__(self, 35)
+        prop, value, pos, end = find_properties(string)[0]
        return { prop: value }, (pos, end)
    except IndexError:
        return None, None
        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()
-        def register_property(propname, props):
+def process(mtree):
-            """props a dict of {value: [patterns]}"""
+    SingleNodeGuesser(guess_properties, 1.0, log).process(mtree)
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, kwargs = patterns
                    kwargs = dict(kwargs)
                    kwargs['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns2, **kwargs)
                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)
        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)
        register_property('container', {'mp4': ['MP4']})
        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property('format', {'VHS': ['VHS'],
                                     'Cam': ['CAM', 'CAMRip'],
                                     'Telesync': ['TELESYNC', 'PDVD'],
                                     'Telesync': (['TS'], {'confidence': 0.2}),
                                     'Workprint': ['WORKPRINT', 'WP'],
                                     'Telecine': ['TELECINE', 'TC'],
                                     'PPV': ['PPV', 'PPV-Rip'],  # Pay Per View
                                     'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
                                     'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
                                     'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS'],
                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
                                     'VOD': ['VOD', 'VOD-Rip'],
                                     'WEBRip': ['WEB-Rip'],
                                     'WEB-DL': ['WEB-DL'],
                                     'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
                                     'BluRay': ['Blu-ray', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
                                     })
        register_quality('format', {'VHS': -100,
                                    'Cam': -90,
                                    'Telesync': -80,
                                    'Workprint': -70,
                                    'Telecine': -60,
                                    'PPV': -50,
                                    'TV': -30,
                                    'DVB': -20,
                                    'DVD': 0,
                                    'HDTV': 20,
                                    'VOD': 40,
                                    'WEBRip': 50,
                                    'WEB-DL': 60,
                                    'HD-DVD': 80,
                                    'BluRay': 100
                                    })
        register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
                                         '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
                                         '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
                                         '480p': (['hr'], {'confidence': 0.2}),
                                         '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
                                         '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
                                         '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
                                         '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
                                         '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080(?:p?x?)'],
                                         '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
                                         })
        register_quality('screenSize', {'360p': -300,
                                        '368p': -200,
                                        '480p': -100,
                                        '576p': 0,
                                        '720p': 100,
                                        '900p': 130,
                                        '1080i': 180,
                                        '1080p': 200,
                                        '4K': 400
                                        })
        _videoCodecProperty = {'Real': ['Rv\d{2}'],  # http://en.wikipedia.org/wiki/RealVideo
                               'Mpeg2': ['Mpeg2'],
                               'DivX': ['DVDivX', 'DivX'],
                               'XviD': ['XviD'],
                               'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
                               'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
                               }
        register_property('videoCodec', _videoCodecProperty)
        register_quality('videoCodec', {'Real': -50,
                                        'Mpeg2': -30,
                                        'DivX': -10,
                                        'XviD': 0,
                                        'h264': 100,
                                        'h265': 150
                                        })
        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        register_quality('videoProfile', {'BP': -20,
                                          'XP': -10,
                                          'MP': 0,
                                          'HP': 10,
                                          '10bit': 15,
                                          'Hi422P': 25,
                                          'Hi444PP': 35
                                          })
        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property('videoApi', {'DXVA': ['DXVA']})
        register_property('audioCodec', {'MP3': ['MP3'],
                                         'DolbyDigital': ['DD'],
                                         'AAC': ['AAC'],
                                         'AC3': ['AC3'],
                                         'Flac': ['FLAC'],
                                         'DTS': ['DTS'],
                                         'TrueHD': ['True-HD']
                                         })
        register_quality('audioCodec', {'MP3': 10,
                                        'DolbyDigital': 30,
                                        'AAC': 35,
                                        'AC3': 40,
                                        'Flac': 45,
                                        'DTS': 60,
                                        'TrueHD': 70
                                        })
        self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))
        register_quality('audioProfile', {'HD': 20,
                                          'HDMA': 50,
                                          'LC': 0,
                                          'HQ': 0,
                                          'HE': 20
                                          })
        register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch'],
                                            '5.1': ['5[\W_]1', '5ch'],
                                            '2.0': ['2[\W_]0', '2ch', 'stereo'],
                                            '1.0': ['1[\W_]0', '1ch', 'mono']
                                            })
        register_quality('audioChannels', {'7.1': 200,
                                           '5.1': 100,
                                           '2.0': 0,
                                           '1.0': -100
                                           })
        self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')
        register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
                                    'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
                                    'DualAudio': ['Dual-Audio'],
                                    'WideScreen': ['ws', 'wide-screen'],
                                    })
        self.container.register_property('other', 'Real', 'Fix', canonical_form="Proper", validator=WeakValidator())
        self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form="Proper")
        self.container.register_canonical_properties('other', 'R5', 'Screener', '3D', 'HD', 'HQ', 'DDC')
        self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())
        for prop in self.container.get_properties('format'):
            self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')
        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property('container', container, confidence=0.3)
    def guess_properties(self, string, node=None, options=None):
        found = self.container.find_properties(string, node)
        return self.container.as_guess(found, string)
    def supported_properties(self):
        return self.container.get_supported_properties()
    def process(self, mtree, options=None):
        GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
    def rate_quality(self, guess, *props):
        return self.qualities.rate_quality(guess, *props)
--- a/lib/guessit/transfo/guess_release_group.py
+++ b/lib/guessit/transfo/guess_release_group.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,132 +18,69 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit.transfo import SingleNodeGuesser
 from guessit.patterns import prop_multi, compute_canonical_form, _dash, _psep
 import re
 import logging
-from guessit.plugins.transformers import Transformer
+log = logging.getLogger(__name__)
-from guessit.matcher import GuessFinder, found_property, found_guess
+
-from guessit.containers import PropertiesContainer
+def get_patterns(property_name):
-from guessit.patterns import sep
+    return [ p.replace(_dash, _psep) for patterns in prop_multi[property_name].values() for p in patterns  ]
-from guessit.guess import Guess
+
-from guessit.textutils import strip_brackets
+CODECS = get_patterns('videoCodec')
 FORMATS = get_patterns('format')
 VAPIS = get_patterns('videoApi')
 # RG names following a codec or format, with a potential space or dash inside the name
 GROUP_NAMES = [ r'(?P<videoCodec>' + codec + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
                for codec in CODECS ]
 GROUP_NAMES += [ r'(?P<format>'    + fmt   + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
                 for fmt in FORMATS ]
 GROUP_NAMES += [ r'(?P<videoApi>'  + api   + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
                 for api in VAPIS ]
 GROUP_NAMES2 = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
                 for codec in CODECS ]
 GROUP_NAMES2 += [ r'\.(?P<format>'    + fmt   + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
                  for fmt in FORMATS ]
 GROUP_NAMES2 += [ r'\.(?P<videoApi>'  + vapi  + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
                  for vapi in VAPIS ]
 GROUP_NAMES = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES ]
 GROUP_NAMES2 = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES2 ]
 def adjust_metadata(md):
    return dict((property_name, compute_canonical_form(property_name, value) or value)
                for property_name, value in md.items())
-class GuessReleaseGroup(Transformer):
+def guess_release_group(string):
-    def __init__(self):
+    # first try to see whether we have both a known codec and a known release group
-        Transformer.__init__(self, -190)
+    for rexp in GROUP_NAMES:
-        self.container = PropertiesContainer(canonical_from_pattern=False)
+        match = rexp.search(string)
-        self._allowed_groupname_pattern = '[\w@#€£$&]'
+        while match:
-        self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
+            metadata = match.groupdict()
-                               lambda elt: self._is_number(elt),
+            # make sure this is an actual release group we caught
-                               ]
+            release_group = (compute_canonical_form('releaseGroup', metadata['releaseGroup']) or
-        # If the previous property in this list, the match will be considered as safe
+                             compute_canonical_form('weakReleaseGroup', metadata['releaseGroup']))
-        # and group name can contain a separator.
+            if release_group:
-        self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels']
+                return adjust_metadata(metadata), (match.start(1), match.end(2))
-        self.container.sep_replace_char = '-'
+            # we didn't find anything conclusive, keep searching
-        self.container.canonical_from_pattern = False
+            match = rexp.search(string, match.span()[0]+1)
        self.container.enhance = True
        self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+')
        self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')
-    def supported_properties(self):
+    # pick anything as releaseGroup as long as we have a codec in front
-        return self.container.get_supported_properties()
+    # this doesn't include a potential dash ('-') ending the release group
    # eg: [...].X264-HiS@SiLUHD-English.[...]
    for rexp in GROUP_NAMES2:
        match = rexp.search(string)
        if match:
            return adjust_metadata(match.groupdict()), (match.start(1), match.end(2))
-    def _is_number(self, s):
+    return None, None
        try:
            int(s)
            return True
        except ValueError:
            return False
    def validate_group_name(self, guess):
        val = guess['releaseGroup']
        if len(val) >= 2:
-            if '-' in val:
+def process(mtree):
-                checked_val = ""
+    SingleNodeGuesser(guess_release_group, 0.8, log).process(mtree)
                for elt in val.split('-'):
                    forbidden = False
                    for forbidden_lambda in self._forbidden_groupname_lambda:
                        forbidden = forbidden_lambda(elt.lower())
                        if forbidden:
                            break
                    if not forbidden:
                        if checked_val:
                            checked_val += '-'
                        checked_val += elt
                    else:
                        break
                val = checked_val
                if not val:
                    return False
                guess['releaseGroup'] = val
            forbidden = False
            for forbidden_lambda in self._forbidden_groupname_lambda:
                forbidden = forbidden_lambda(val.lower())
                if forbidden:
                    break
            if not forbidden:
                return True
        return False
    def is_leaf_previous(self, leaf, node):
        if leaf.span[1] <= node.span[0]:
            for idx in range(leaf.span[1], node.span[0]):
                if not leaf.root.value[idx] in sep:
                    return False
            return True
        return False
    def guess_release_group(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, 'releaseGroup')
        guess = self.container.as_guess(found, string, self.validate_group_name, sep_replacement='-')
        validated_guess = None
        if guess:
            explicit_group_node = node.group_node()
            if explicit_group_node:
                for leaf in explicit_group_node.leaves_containing(self.previous_safe_properties):
                    if self.is_leaf_previous(leaf, node):
                        if leaf.root.value[leaf.span[1]] == '-':
                            guess.metadata().confidence = 1
                        else:
                            guess.metadata().confidence = 0.7
                        validated_guess = guess
            if not validated_guess:
                # If previous group last leaf is identified as a safe property,
                # consider the raw value as a releaseGroup
                previous_group_node = node.previous_group_node()
                if previous_group_node:
                    for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
                        if self.is_leaf_previous(leaf, node):
                            guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
                            if self.validate_group_name(guess):
                                node.guess = guess
                                validated_guess = guess
            if validated_guess:
                # If following group nodes have only one unidentified leaf, it belongs to the release group
                next_group_node = node
                while True:
                    next_group_node = next_group_node.next_group_node()
                    if next_group_node:
                        leaves = next_group_node.leaves()
                        if len(leaves) == 1 and not leaves[0].guess:
                            validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
                            leaves[0].guess = validated_guess
                        else:
                            break
                    else:
                        break
        if validated_guess:
            # Strip brackets
            validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])
        return validated_guess
    def process(self, mtree, options=None):
        GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/lib/guessit/transfo/guess_video_rexps.py
+++ b/lib/guessit/transfo/guess_video_rexps.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,41 +18,33 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, \
+from __future__ import unicode_literals
-    unicode_literals
+from guessit import Guess
 from guessit.transfo import SingleNodeGuesser
 from guessit.patterns import video_rexps, sep
 import re
 import logging
-from guessit.patterns import _psep
+log = logging.getLogger(__name__)
 from guessit.containers import PropertiesContainer
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
 from guessit.patterns.numeral import parse_numeral
-class GuessVideoRexps(Transformer):
+def guess_video_rexps(string):
-    def __init__(self):
+    string = '-' + string + '-'
-        Transformer.__init__(self, 25)
+    for rexp, confidence, span_adjust in video_rexps:
        match = re.search(sep + rexp + sep, string, re.IGNORECASE)
        if match:
            metadata = match.groupdict()
            # is this the better place to put it? (maybe, as it is at least
            # the soonest that we can catch it)
            if metadata.get('cdNumberTotal', -1) is None:
                del metadata['cdNumberTotal']
            span = (match.start() + span_adjust[0],
                    match.end() + span_adjust[1] - 2)
            return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]),
                    span)
-        self.container = PropertiesContainer(canonical_from_pattern=False)
+    return None, None
        self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral)
        self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral)
-        self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
+def process(mtree):
-
+    SingleNodeGuesser(guess_video_rexps, None, log).process(mtree)
        self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
        self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition')
        self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition')
        self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition')
        self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition')
        self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')
    def supported_properties(self):
        return self.container.get_supported_properties()
    def guess_video_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node)
        return self.container.as_guess(found, string)
    def process(self, mtree, options=None):
        GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/lib/guessit/transfo/guess_weak_episodes_rexps.py
+++ b/lib/guessit/transfo/guess_weak_episodes_rexps.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,52 +18,45 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit import Guess
 from guessit.transfo import SingleNodeGuesser
 from guessit.patterns import weak_episode_rexps
 import re
 import logging
-from guessit.plugins.transformers import Transformer
+log = logging.getLogger(__name__)
 from guessit.matcher import GuessFinder
 from guessit.patterns import sep
 from guessit.containers import PropertiesContainer
 from guessit.patterns.numeral import numeral, parse_numeral
 from guessit.date import valid_year
-class GuessWeakEpisodesRexps(Transformer):
+def guess_weak_episodes_rexps(string, node):
-    def __init__(self):
+    if 'episodeNumber' in node.root.info:
-        Transformer.__init__(self, 15)
+        return None, None
-        self.properties = PropertiesContainer(enhance=False, canonical_from_pattern=False)
+    for rexp, span_adjust in weak_episode_rexps:
        match = re.search(rexp, string, re.IGNORECASE)
        if match:
            metadata = match.groupdict()
            span = (match.start() + span_adjust[0],
                    match.end() + span_adjust[1])
-        def _formater(episodeNumber):
+            epnum = int(metadata['episodeNumber'])
-            epnum = parse_numeral(episodeNumber)
+            if epnum > 100:
-            if not valid_year(epnum):
+                season, epnum = epnum // 100, epnum % 100
-                if epnum > 100:
+                # episodes which have a season > 25 are most likely errors
-                    season, epnum = epnum // 100, epnum % 100
+                # (Simpsons is at 23!)
-                    # episodes which have a season > 50 are most likely errors
+                if season > 25:
-                    # (Simpson is at 25!)
+                    continue
-                    if season > 50:
+                return Guess({ 'season': season,
-                        return None
+                               'episodeNumber': epnum },
-                    return {'season': season, 'episodeNumber': epnum}
+                             confidence=0.6, raw=string[span[0]:span[1]]), span
-                else:
+            else:
-                    return epnum
+                return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span
-        self.properties.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater)
+    return None, None
        self.properties.register_property('episodeNumber', '(?:episode)' + sep + '(' + numeral + ')[^0-9]', confidence=0.3)
    def supported_properties(self):
        return self.properties.get_supported_properties()
-    def guess_weak_episodes_rexps(self, string, node=None, options=None):
+guess_weak_episodes_rexps.use_node = True
        if node and 'episodeNumber' in node.root.info:
            return None
        properties = self.properties.find_properties(string, node)
        guess = self.properties.as_guess(properties, string)
-        return guess
+def process(mtree):
-
+    SingleNodeGuesser(guess_weak_episodes_rexps, 0.6, log).process(mtree)
    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')
    def process(self, mtree, options=None):
        GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/lib/guessit/transfo/guess_website.py
+++ b/lib/guessit/transfo/guess_website.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,49 +18,22 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, \
+from __future__ import unicode_literals
-    unicode_literals
+from guessit.transfo import SingleNodeGuesser
 from guessit.patterns import websites
 import logging
-from guessit.patterns import build_or_pattern
+log = logging.getLogger(__name__)
 from guessit.containers import PropertiesContainer
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
 from pkg_resources import resource_stream  # @UnresolvedImport
-class GuessWebsite(Transformer):
+def guess_website(string):
-    def __init__(self):
+    low = string.lower()
-        Transformer.__init__(self, 45)
+    for site in websites:
        pos = low.find(site.lower())
        if pos != -1:
            return {'website': site}, (pos, pos + len(site))
    return None, None
        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
-        tlds = []
+def process(mtree):
-
+    SingleNodeGuesser(guess_website, 1.0, log).process(mtree)
        f = resource_stream('guessit', 'tlds-alpha-by-domain.txt')
        f.readline()
        next(f)
        for tld in f:
            tld = tld.strip()
            if b'--' in tld:
                continue
            tlds.append(tld.decode("utf-8"))
        f.close()
        tlds_pattern = build_or_pattern(tlds)  # All registered domain extension
        safe_tlds_pattern = build_or_pattern(['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net'])  # Those words before a tlds are sure
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
    def supported_properties(self):
        return self.container.get_supported_properties()
    def guess_website(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, 'website')
        return self.container.as_guess(found, string)
    def process(self, mtree, options=None):
        GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/lib/guessit/transfo/guess_year.py
+++ b/lib/guessit/transfo/guess_year.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,32 +18,33 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
-
+from guessit.transfo import SingleNodeGuesser
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
 from guessit.date import search_year
 import logging
 log = logging.getLogger(__name__)
-class GuessYear(Transformer):
+def guess_year(string):
-    def __init__(self):
+    year, span = search_year(string)
-        Transformer.__init__(self, -160)
+    if year:
        return { 'year': year }, span
    else:
        return None, None
-    def supported_properties(self):
+def guess_year_skip_first(string):
-        return ['year']
+    year, span = search_year(string)
    if year:
        year2, span2 = guess_year(string[span[1]:])
        if year2:
            return year2, (span2[0]+span[1], span2[1]+span[1])
-    def guess_year(self, string, node=None, options=None):
+    return None, None
        year, span = search_year(string)
        if year:
            return {'year': year}, span
        else:
            return None, None
    def second_pass_options(self, mtree, options=None):
        year_nodes = mtree.leaves_containing('year')
        if len(year_nodes) > 1:
            return {'skip_nodes': year_nodes[:len(year_nodes) - 1]}
        return None
-    def process(self, mtree, options=None):
+def process(mtree, skip_first_year=False):
-        GuessFinder(self.guess_year, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
+    if skip_first_year:
        SingleNodeGuesser(guess_year_skip_first, 1.0, log).process(mtree)
    else:
        SingleNodeGuesser(guess_year, 1.0, log).process(mtree)
--- a/lib/guessit/transfo/post_process.py
+++ b/lib/guessit/transfo/post_process.py
@ -0,0 +1,73 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import unicode_literals
 from guessit.patterns import subtitle_exts
 from guessit.textutils import reorder_title, find_words
 import logging
 log = logging.getLogger(__name__)
 def process(mtree):
    # 1- try to promote language to subtitle language where it makes sense
    for node in mtree.nodes():
        if 'language' not in node.guess:
            continue
        def promote_subtitle():
            # pylint: disable=W0631
            node.guess.set('subtitleLanguage', node.guess['language'],
                           confidence=node.guess.confidence('language'))
            del node.guess['language']
        # - if we matched a language in a file with a sub extension and that
        #   the group is the last group of the filename, it is probably the
        #   language of the subtitle
        #   (eg: 'xxx.english.srt')
        if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
            node == mtree.leaves()[-2]):
            promote_subtitle()
        # - if we find the word 'sub' before the language, and in the same explicit
        #   group, then upgrade the language
        explicit_group = mtree.node_at(node.node_idx[:2])
        group_str = explicit_group.value.lower()
        if ('sub' in find_words(group_str) and
            0 <= group_str.find('sub') < (node.span[0] - explicit_group.span[0])):
            promote_subtitle()
        # - if a language is in an explicit group just preceded by "st",
        #   it is a subtitle language (eg: '...st[fr-eng]...')
        try:
            idx = node.node_idx
            previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
            if previous.value.lower()[-2:] == 'st':
                promote_subtitle()
        except IndexError:
            pass
    # 2- ", the" at the end of a series title should be prepended to it
    for node in mtree.nodes():
        if 'series' not in node.guess:
            continue
        node.guess['series'] = reorder_title(node.guess['series'])
--- a/lib/guessit/transfo/split_explicit_groups.py
+++ b/lib/guessit/transfo/split_explicit_groups.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,32 +18,27 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit.plugins.transformers import Transformer
 from guessit.textutils import find_first_level_groups
 from guessit.patterns import group_delimiters
-from functools import reduce
+import functools
 import logging
 log = logging.getLogger(__name__)
-class SplitExplicitGroups(Transformer):
+def process(mtree):
-    def __init__(self):
+    """return the string split into explicit groups, that is, those either
-        Transformer.__init__(self, 245)
+    between parenthese, square brackets or curly braces, and those separated
    by a dash."""
    for c in mtree.children:
        groups = find_first_level_groups(c.value, group_delimiters[0])
        for delimiters in group_delimiters:
            flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
            groups = functools.reduce(flatten, groups, [])
-    def process(self, mtree, options=None):
+        # do not do this at this moment, it is not strong enough and can break other
-        """split each of those into explicit groups (separated by parentheses or square brackets)
+        # patterns, such as dates, etc...
        #groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
-        :return: return the string split into explicit groups, that is, those either
+        c.split_on_components(groups)
        between parenthese, square brackets or curly braces, and those separated
        by a dash."""
        for c in mtree.children:
            groups = find_first_level_groups(c.value, group_delimiters[0])
            for delimiters in group_delimiters:
                flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
                groups = reduce(flatten, groups, [])
            # do not do this at this moment, it is not strong enough and can break other
            # patterns, such as dates, etc...
            # groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
            c.split_on_components(groups)
--- a/lib/guessit/transfo/split_on_dash.py
+++ b/lib/guessit/transfo/split_on_dash.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,30 +18,25 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit.plugins.transformers import Transformer
 from guessit.patterns import sep
 import re
 import logging
 log = logging.getLogger(__name__)
-class SplitOnDash(Transformer):
+def process(mtree):
-    def __init__(self):
+    for node in mtree.unidentified_leaves():
-        Transformer.__init__(self, 190)
+        indices = []
-    def process(self, mtree, options=None):
+        didx = 0
-        """split into '-' separated subgroups (with required separator chars
+        pattern = re.compile(sep + '-' + sep)
-        around the dash)
+        match = pattern.search(node.value)
-        """
+        while match:
-        for node in mtree.unidentified_leaves():
+            span = match.span()
-            indices = []
+            indices.extend([ span[0], span[1] ])
            match = pattern.search(node.value, span[1])
-            pattern = re.compile(sep + '-' + sep)
+        if indices:
-            match = pattern.search(node.value)
+            node.partition(indices)
            while match:
                span = match.span()
                indices.extend([span[0], span[1]])
                match = pattern.search(node.value, span[1])
            if indices:
                node.partition(indices)
--- a/lib/guessit/transfo/split_path_components.py
+++ b/lib/guessit/transfo/split_path_components.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,28 +18,19 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import unicode_literals
 from guessit.plugins.transformers import Transformer
 from guessit import fileutils
-from os.path import splitext
+import os.path
 import logging
 log = logging.getLogger(__name__)
-class SplitPathComponents(Transformer):
+def process(mtree):
-    def __init__(self):
+    """Returns the filename split into [ dir*, basename, ext ]."""
-        Transformer.__init__(self, 255)
+    components = fileutils.split_path(mtree.value)
    basename = components.pop(-1)
    components += list(os.path.splitext(basename))
    components[-1] = components[-1][1:] # remove the '.' from the extension
-    def process(self, mtree, options=None):
+    mtree.split_on_components(components)
        """first split our path into dirs + basename + ext
        :return: the filename split into [ dir*, basename, ext ]
        """
        if not options.get('name_only'):
            components = fileutils.split_path(mtree.value)
            basename = components.pop(-1)
            components += list(splitext(basename))
            components[-1] = components[-1][1:]  # remove the '.' from the extension
            mtree.split_on_components(components)
        else:
            mtree.split_on_components([mtree.value, ''])
		`@ -1 +0,0 @@`
			`Just a dummy srt file (used for unittests: do not remove!)`