mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-07-16 02:02:53 -07:00
Added GuessIt library and required libs for it.
We now perform a guessit on the nzbName to extract movie title and year instead of a regex, this is more accurate.
This commit is contained in:
parent
d26cc388d1
commit
c1a1354636
69 changed files with 9263 additions and 38 deletions
18
lib/babelfish/__init__.py
Normal file
18
lib/babelfish/__init__.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
__title__ = 'babelfish'
|
||||
__version__ = '0.5.1'
|
||||
__author__ = 'Antoine Bertin'
|
||||
__license__ = 'BSD'
|
||||
__copyright__ = 'Copyright 2013 the BabelFish authors'
|
||||
|
||||
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
|
||||
CountryReverseConverter)
|
||||
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
|
||||
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
|
||||
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
|
||||
from .script import SCRIPTS, SCRIPT_MATRIX, Script
|
280
lib/babelfish/converters/__init__.py
Normal file
280
lib/babelfish/converters/__init__.py
Normal file
|
@ -0,0 +1,280 @@
|
|||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
import collections
|
||||
from pkg_resources import iter_entry_points, EntryPoint
|
||||
from ..exceptions import LanguageConvertError, LanguageReverseError
|
||||
|
||||
|
||||
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
|
||||
class CaseInsensitiveDict(collections.MutableMapping):
|
||||
"""A case-insensitive ``dict``-like object.
|
||||
|
||||
Implements all methods and operations of
|
||||
``collections.MutableMapping`` as well as dict's ``copy``. Also
|
||||
provides ``lower_items``.
|
||||
|
||||
All keys are expected to be strings. The structure remembers the
|
||||
case of the last key to be set, and ``iter(instance)``,
|
||||
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
|
||||
will contain case-sensitive keys. However, querying and contains
|
||||
testing is case insensitive:
|
||||
|
||||
cid = CaseInsensitiveDict()
|
||||
cid['English'] = 'eng'
|
||||
cid['ENGLISH'] == 'eng' # True
|
||||
list(cid) == ['English'] # True
|
||||
|
||||
If the constructor, ``.update``, or equality comparison
|
||||
operations are given keys that have equal ``.lower()``s, the
|
||||
behavior is undefined.
|
||||
|
||||
"""
|
||||
def __init__(self, data=None, **kwargs):
|
||||
self._store = dict()
|
||||
if data is None:
|
||||
data = {}
|
||||
self.update(data, **kwargs)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
# Use the lowercased key for lookups, but store the actual
|
||||
# key alongside the value.
|
||||
self._store[key.lower()] = (key, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._store[key.lower()][1]
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._store[key.lower()]
|
||||
|
||||
def __iter__(self):
|
||||
return (casedkey for casedkey, mappedvalue in self._store.values())
|
||||
|
||||
def __len__(self):
|
||||
return len(self._store)
|
||||
|
||||
def lower_items(self):
|
||||
"""Like iteritems(), but with all lowercase keys."""
|
||||
return (
|
||||
(lowerkey, keyval[1])
|
||||
for (lowerkey, keyval)
|
||||
in self._store.items()
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, collections.Mapping):
|
||||
other = CaseInsensitiveDict(other)
|
||||
else:
|
||||
return NotImplemented
|
||||
# Compare insensitively
|
||||
return dict(self.lower_items()) == dict(other.lower_items())
|
||||
|
||||
# Copy is required
|
||||
def copy(self):
|
||||
return CaseInsensitiveDict(self._store.values())
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
|
||||
|
||||
|
||||
class LanguageConverter(object):
|
||||
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
|
||||
alpha2 country code and a script code into a custom code
|
||||
|
||||
.. attribute:: codes
|
||||
|
||||
Set of possible custom codes
|
||||
|
||||
"""
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
"""Convert an alpha3 language code with an alpha2 country code and a script code
|
||||
into a custom code
|
||||
|
||||
:param string alpha3: ISO-639-3 language code
|
||||
:param country: ISO-3166 country code, if any
|
||||
:type country: string or None
|
||||
:param script: ISO-15924 script code, if any
|
||||
:type script: string or None
|
||||
:return: the corresponding custom code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanguageReverseConverter(LanguageConverter):
|
||||
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
|
||||
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||
|
||||
"""
|
||||
def reverse(self, code):
|
||||
"""Reverse a custom code into alpha3, country and script code
|
||||
|
||||
:param string code: custom code to reverse
|
||||
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||
:rtype: tuple
|
||||
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanguageEquivalenceConverter(LanguageReverseConverter):
|
||||
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
|
||||
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
|
||||
|
||||
You must specify the dict of equivalence as a class variable named SYMBOLS.
|
||||
|
||||
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
|
||||
case-sensitive (it is case-insensitive by default).
|
||||
|
||||
Example::
|
||||
|
||||
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
|
||||
|
||||
"""
|
||||
CASE_SENSITIVE = False
|
||||
|
||||
def __init__(self):
|
||||
self.codes = set()
|
||||
self.to_symbol = {}
|
||||
if self.CASE_SENSITIVE:
|
||||
self.from_symbol = {}
|
||||
else:
|
||||
self.from_symbol = CaseInsensitiveDict()
|
||||
|
||||
for alpha3, symbol in self.SYMBOLS.items():
|
||||
self.to_symbol[alpha3] = symbol
|
||||
self.from_symbol[symbol] = (alpha3, None, None)
|
||||
self.codes.add(symbol)
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
try:
|
||||
return self.to_symbol[alpha3]
|
||||
except KeyError:
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
|
||||
def reverse(self, code):
|
||||
try:
|
||||
return self.from_symbol[code]
|
||||
except KeyError:
|
||||
raise LanguageReverseError(code)
|
||||
|
||||
|
||||
class CountryConverter(object):
|
||||
"""A :class:`CountryConverter` supports converting an alpha2 country code
|
||||
into a custom code
|
||||
|
||||
.. attribute:: codes
|
||||
|
||||
Set of possible custom codes
|
||||
|
||||
"""
|
||||
def convert(self, alpha2):
|
||||
"""Convert an alpha2 country code into a custom code
|
||||
|
||||
:param string alpha2: ISO-3166-1 language code
|
||||
:return: the corresponding custom code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.CountryConvertError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class CountryReverseConverter(CountryConverter):
|
||||
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
|
||||
ISO-3166-1 country code
|
||||
|
||||
"""
|
||||
def reverse(self, code):
|
||||
"""Reverse a custom code into alpha2 code
|
||||
|
||||
:param string code: custom code to reverse
|
||||
:return: the corresponding alpha2 ISO-3166-1 country code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.CountryReverseError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ConverterManager(object):
|
||||
"""Manager for babelfish converters behaving like a dict with lazy loading
|
||||
|
||||
Loading is done in this order:
|
||||
|
||||
* Entry point converters
|
||||
* Registered converters
|
||||
* Internal converters
|
||||
|
||||
.. attribute:: entry_point
|
||||
|
||||
The entry point where to look for converters
|
||||
|
||||
.. attribute:: internal_converters
|
||||
|
||||
Internal converters with entry point syntax
|
||||
|
||||
"""
|
||||
entry_point = ''
|
||||
internal_converters = []
|
||||
|
||||
def __init__(self):
|
||||
#: Registered converters with entry point syntax
|
||||
self.registered_converters = []
|
||||
|
||||
#: Loaded converters
|
||||
self.converters = {}
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Get a converter, lazy loading it if necessary"""
|
||||
if name in self.converters:
|
||||
return self.converters[name]
|
||||
for ep in iter_entry_points(self.entry_point):
|
||||
if ep.name == name:
|
||||
self.converters[ep.name] = ep.load()()
|
||||
return self.converters[ep.name]
|
||||
for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
|
||||
if ep.name == name:
|
||||
self.converters[ep.name] = ep.load(require=False)()
|
||||
return self.converters[ep.name]
|
||||
raise KeyError(name)
|
||||
|
||||
def __setitem__(self, name, converter):
|
||||
"""Load a converter"""
|
||||
self.converters[name] = converter
|
||||
|
||||
def __delitem__(self, name):
|
||||
"""Unload a converter"""
|
||||
del self.converters[name]
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterator over loaded converters"""
|
||||
return iter(self.converters)
|
||||
|
||||
def register(self, entry_point):
|
||||
"""Register a converter
|
||||
|
||||
:param string entry_point: converter to register (entry point syntax)
|
||||
:raise: ValueError if already registered
|
||||
|
||||
"""
|
||||
if entry_point in self.registered_converters:
|
||||
raise ValueError('Already registered')
|
||||
self.registered_converters.insert(0, entry_point)
|
||||
|
||||
def unregister(self, entry_point):
|
||||
"""Unregister a converter
|
||||
|
||||
:param string entry_point: converter to unregister (entry point syntax)
|
||||
|
||||
"""
|
||||
self.registered_converters.remove(entry_point)
|
||||
|
||||
def __contains__(self, name):
|
||||
return name in self.converters
|
17
lib/babelfish/converters/alpha2.py
Normal file
17
lib/babelfish/converters/alpha2.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha2Converter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha2:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha2
|
17
lib/babelfish/converters/alpha3b.py
Normal file
17
lib/babelfish/converters/alpha3b.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha3BConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha3b:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
|
17
lib/babelfish/converters/alpha3t.py
Normal file
17
lib/babelfish/converters/alpha3t.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha3TConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha3t:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
|
31
lib/babelfish/converters/countryname.py
Normal file
31
lib/babelfish/converters/countryname.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import CountryReverseConverter, CaseInsensitiveDict
|
||||
from ..country import COUNTRY_MATRIX
|
||||
from ..exceptions import CountryConvertError, CountryReverseError
|
||||
|
||||
|
||||
class CountryNameConverter(CountryReverseConverter):
|
||||
def __init__(self):
|
||||
self.codes = set()
|
||||
self.to_name = {}
|
||||
self.from_name = CaseInsensitiveDict()
|
||||
for country in COUNTRY_MATRIX:
|
||||
self.codes.add(country.name)
|
||||
self.to_name[country.alpha2] = country.name
|
||||
self.from_name[country.name] = country.alpha2
|
||||
|
||||
def convert(self, alpha2):
|
||||
if alpha2 not in self.to_name:
|
||||
raise CountryConvertError(alpha2)
|
||||
return self.to_name[alpha2]
|
||||
|
||||
def reverse(self, name):
|
||||
if name not in self.from_name:
|
||||
raise CountryReverseError(name)
|
||||
return self.from_name[name]
|
17
lib/babelfish/converters/name.py
Normal file
17
lib/babelfish/converters/name.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class NameConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = False
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.name:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.name
|
36
lib/babelfish/converters/opensubtitles.py
Normal file
36
lib/babelfish/converters/opensubtitles.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageReverseConverter, CaseInsensitiveDict
|
||||
from ..exceptions import LanguageReverseError
|
||||
from ..language import language_converters
|
||||
|
||||
|
||||
class OpenSubtitlesConverter(LanguageReverseConverter):
|
||||
def __init__(self):
|
||||
self.alpha3b_converter = language_converters['alpha3b']
|
||||
self.alpha2_converter = language_converters['alpha2']
|
||||
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
|
||||
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
|
||||
'scc': ('srp', None), 'mne': ('srp', 'ME')})
|
||||
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
|
||||
if (alpha3b, country) in self.to_opensubtitles:
|
||||
return self.to_opensubtitles[(alpha3b, country)]
|
||||
return alpha3b
|
||||
|
||||
def reverse(self, opensubtitles):
|
||||
if opensubtitles in self.from_opensubtitles:
|
||||
return self.from_opensubtitles[opensubtitles]
|
||||
for conv in [self.alpha3b_converter, self.alpha2_converter]:
|
||||
try:
|
||||
return conv.reverse(opensubtitles)
|
||||
except LanguageReverseError:
|
||||
pass
|
||||
raise LanguageReverseError(opensubtitles)
|
23
lib/babelfish/converters/scope.py
Normal file
23
lib/babelfish/converters/scope.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageConverter
|
||||
from ..exceptions import LanguageConvertError
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class ScopeConverter(LanguageConverter):
|
||||
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.scope
|
||||
codes = set(SYMBOLS.values())
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||
raise LanguageConvertError(alpha3, country, script)
|
23
lib/babelfish/converters/type.py
Normal file
23
lib/babelfish/converters/type.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageConverter
|
||||
from ..exceptions import LanguageConvertError
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class LanguageTypeConverter(LanguageConverter):
|
||||
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.type
|
||||
codes = set(SYMBOLS.values())
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||
raise LanguageConvertError(alpha3, country, script)
|
95
lib/babelfish/country.py
Normal file
95
lib/babelfish/country.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from .converters import ConverterManager
|
||||
|
||||
|
||||
COUNTRIES = {}
|
||||
COUNTRY_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
|
||||
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
|
||||
|
||||
f = resource_stream('babelfish', 'data/iso-3166-1.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
|
||||
COUNTRIES[iso_country.alpha2] = iso_country.name
|
||||
COUNTRY_MATRIX.append(iso_country)
|
||||
f.close()
|
||||
|
||||
|
||||
class CountryConverterManager(ConverterManager):
|
||||
""":class:`~babelfish.converters.ConverterManager` for country converters"""
|
||||
entry_point = 'babelfish.country_converters'
|
||||
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
|
||||
|
||||
country_converters = CountryConverterManager()
|
||||
|
||||
|
||||
class CountryMeta(type):
|
||||
"""The :class:`Country` metaclass
|
||||
|
||||
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
|
||||
|
||||
"""
|
||||
def __getattr__(cls, name):
|
||||
if name.startswith('from'):
|
||||
return partial(cls.fromcode, converter=name[4:])
|
||||
return getattr(cls, name)
|
||||
|
||||
|
||||
class Country(CountryMeta(str('CountryBase'), (object,), {})):
|
||||
"""A country on Earth
|
||||
|
||||
A country is represented by a 2-letter code from the ISO-3166 standard
|
||||
|
||||
:param string country: 2-letter ISO-3166 country code
|
||||
|
||||
"""
|
||||
def __init__(self, country):
|
||||
if country not in COUNTRIES:
|
||||
raise ValueError('%r is not a valid country' % country)
|
||||
|
||||
#: ISO-3166 2-letter country code
|
||||
self.alpha2 = country
|
||||
|
||||
@classmethod
|
||||
def fromcode(cls, code, converter):
|
||||
"""Create a :class:`Country` by its `code` using `converter` to
|
||||
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
|
||||
|
||||
:param string code: the code to reverse
|
||||
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
|
||||
:return: the corresponding :class:`Country` instance
|
||||
:rtype: :class:`Country`
|
||||
|
||||
"""
|
||||
return cls(country_converters[converter].reverse(code))
|
||||
|
||||
def __getattr__(self, name):
|
||||
return country_converters[name].convert(self.alpha2)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.alpha2)
|
||||
|
||||
def __eq__(self, other):
|
||||
if other is None:
|
||||
return False
|
||||
return self.alpha2 == other.alpha2
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<Country [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
return self.alpha2
|
85
lib/babelfish/exceptions.py
Normal file
85
lib/babelfish/exceptions.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
"""Base class for all exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class LanguageError(Error, AttributeError):
|
||||
"""Base class for all language exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class LanguageConvertError(LanguageError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
|
||||
|
||||
:param string alpha3: alpha3 code that failed conversion
|
||||
:param country: country code that failed conversion, if any
|
||||
:type country: string or None
|
||||
:param script: script code that failed conversion, if any
|
||||
:type script: string or None
|
||||
|
||||
"""
|
||||
def __init__(self, alpha3, country=None, script=None):
|
||||
self.alpha3 = alpha3
|
||||
self.country = country
|
||||
self.script = script
|
||||
|
||||
def __str__(self):
|
||||
s = self.alpha3
|
||||
if self.country is not None:
|
||||
s += '-' + self.country
|
||||
if self.script is not None:
|
||||
s += '-' + self.script
|
||||
return s
|
||||
|
||||
|
||||
class LanguageReverseError(LanguageError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
|
||||
|
||||
:param string code: code that failed reverse conversion
|
||||
|
||||
"""
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.code)
|
||||
|
||||
|
||||
class CountryError(Error, AttributeError):
|
||||
"""Base class for all country exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class CountryConvertError(CountryError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
|
||||
|
||||
:param string alpha2: alpha2 code that failed conversion
|
||||
|
||||
"""
|
||||
def __init__(self, alpha2):
|
||||
self.alpha2 = alpha2
|
||||
|
||||
def __str__(self):
|
||||
return self.alpha2
|
||||
|
||||
|
||||
class CountryReverseError(CountryError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
|
||||
|
||||
:param string code: code that failed reverse conversion
|
||||
|
||||
"""
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.code)
|
174
lib/babelfish/language.py
Normal file
174
lib/babelfish/language.py
Normal file
|
@ -0,0 +1,174 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from .converters import ConverterManager
|
||||
from .country import Country
|
||||
from .exceptions import LanguageConvertError
|
||||
from .script import Script
|
||||
|
||||
|
||||
LANGUAGES = set()
|
||||
LANGUAGE_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
|
||||
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
|
||||
|
||||
f = resource_stream('babelfish', 'data/iso-639-3.tab')
|
||||
f.readline()
|
||||
for l in f:
|
||||
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
|
||||
LANGUAGES.add(iso_language.alpha3)
|
||||
LANGUAGE_MATRIX.append(iso_language)
|
||||
f.close()
|
||||
|
||||
|
||||
class LanguageConverterManager(ConverterManager):
|
||||
""":class:`~babelfish.converters.ConverterManager` for language converters"""
|
||||
entry_point = 'babelfish.language_converters'
|
||||
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
|
||||
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
|
||||
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
|
||||
'name = babelfish.converters.name:NameConverter',
|
||||
'scope = babelfish.converters.scope:ScopeConverter',
|
||||
'type = babelfish.converters.type:LanguageTypeConverter',
|
||||
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
|
||||
|
||||
language_converters = LanguageConverterManager()
|
||||
|
||||
|
||||
class LanguageMeta(type):
|
||||
"""The :class:`Language` metaclass
|
||||
|
||||
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
|
||||
|
||||
"""
|
||||
def __getattr__(cls, name):
|
||||
if name.startswith('from'):
|
||||
return partial(cls.fromcode, converter=name[4:])
|
||||
return getattr(cls, name)
|
||||
|
||||
|
||||
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
|
||||
"""A human language
|
||||
|
||||
A human language is composed of a language part following the ISO-639
|
||||
standard and can be country-specific when a :class:`~babelfish.country.Country`
|
||||
is specified.
|
||||
|
||||
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
|
||||
|
||||
:param string language: the language as a 3-letter ISO-639-3 code
|
||||
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
|
||||
:type country: string or :class:`~babelfish.country.Country` or None
|
||||
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
|
||||
:type script: string or :class:`~babelfish.script.Script` or None
|
||||
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
|
||||
:type unknown: string or None
|
||||
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
|
||||
|
||||
"""
|
||||
def __init__(self, language, country=None, script=None, unknown=None):
|
||||
if unknown is not None and language not in LANGUAGES:
|
||||
language = unknown
|
||||
if language not in LANGUAGES:
|
||||
raise ValueError('%r is not a valid language' % language)
|
||||
self.alpha3 = language
|
||||
self.country = None
|
||||
if isinstance(country, Country):
|
||||
self.country = country
|
||||
elif country is None:
|
||||
self.country = None
|
||||
else:
|
||||
self.country = Country(country)
|
||||
self.script = None
|
||||
if isinstance(script, Script):
|
||||
self.script = script
|
||||
elif script is None:
|
||||
self.script = None
|
||||
else:
|
||||
self.script = Script(script)
|
||||
|
||||
@classmethod
|
||||
def fromcode(cls, code, converter):
|
||||
"""Create a :class:`Language` by its `code` using `converter` to
|
||||
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
|
||||
|
||||
:param string code: the code to reverse
|
||||
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
|
||||
:return: the corresponding :class:`Language` instance
|
||||
:rtype: :class:`Language`
|
||||
|
||||
"""
|
||||
return cls(*language_converters[converter].reverse(code))
|
||||
|
||||
@classmethod
|
||||
def fromietf(cls, ietf):
|
||||
"""Create a :class:`Language` by from an IETF language code
|
||||
|
||||
:param string ietf: the ietf code
|
||||
:return: the corresponding :class:`Language` instance
|
||||
:rtype: :class:`Language`
|
||||
|
||||
"""
|
||||
subtags = ietf.split('-')
|
||||
language_subtag = subtags.pop(0).lower()
|
||||
if len(language_subtag) == 2:
|
||||
language = cls.fromalpha2(language_subtag)
|
||||
else:
|
||||
language = cls(language_subtag)
|
||||
while subtags:
|
||||
subtag = subtags.pop(0)
|
||||
if len(subtag) == 2:
|
||||
language.country = Country(subtag.upper())
|
||||
else:
|
||||
language.script = Script(subtag.capitalize())
|
||||
if language.script is not None:
|
||||
if subtags:
|
||||
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
|
||||
break
|
||||
return language
|
||||
|
||||
def __getattr__(self, name):
|
||||
alpha3 = self.alpha3
|
||||
country = self.country.alpha2 if self.country is not None else None
|
||||
script = self.script.code if self.script is not None else None
|
||||
try:
|
||||
return language_converters[name].convert(alpha3, country, script)
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if other is None:
|
||||
return False
|
||||
return self.alpha3 == other.alpha3 and self.country == other.country and self.script == other.script
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __bool__(self):
|
||||
return self.alpha3 != 'und'
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __repr__(self):
|
||||
return '<Language [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
try:
|
||||
s = self.alpha2
|
||||
except LanguageConvertError:
|
||||
s = self.alpha3
|
||||
if self.country is not None:
|
||||
s += '-' + str(self.country)
|
||||
if self.script is not None:
|
||||
s += '-' + str(self.script)
|
||||
return s
|
66
lib/babelfish/script.py
Normal file
66
lib/babelfish/script.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
||||
|
||||
#: Script code to script name mapping
|
||||
SCRIPTS = {}
|
||||
|
||||
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
|
||||
SCRIPT_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
|
||||
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
|
||||
|
||||
f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
l = l.decode('utf-8').strip()
|
||||
if not l or l.startswith('#'):
|
||||
continue
|
||||
script = IsoScript._make(l.split(';'))
|
||||
SCRIPT_MATRIX.append(script)
|
||||
SCRIPTS[script.code] = script.name
|
||||
f.close()
|
||||
|
||||
|
||||
class Script(object):
|
||||
"""A human writing system
|
||||
|
||||
A script is represented by a 4-letter code from the ISO-15924 standard
|
||||
|
||||
:param string script: 4-letter ISO-15924 script code
|
||||
|
||||
"""
|
||||
def __init__(self, script):
|
||||
if script not in SCRIPTS:
|
||||
raise ValueError('%r is not a valid script' % script)
|
||||
|
||||
#: ISO-15924 4-letter script code
|
||||
self.code = script
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""English name of the script"""
|
||||
return SCRIPTS[self.code]
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.code)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.code == other.code
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<Script [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
return self.code
|
353
lib/babelfish/tests.py
Normal file
353
lib/babelfish/tests.py
Normal file
|
@ -0,0 +1,353 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import sys
|
||||
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
|
||||
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
|
||||
|
||||
|
||||
if sys.version_info[:2] <= (2, 6):
|
||||
_MAX_LENGTH = 80
|
||||
|
||||
def safe_repr(obj, short=False):
|
||||
try:
|
||||
result = repr(obj)
|
||||
except Exception:
|
||||
result = object.__repr__(obj)
|
||||
if not short or len(result) < _MAX_LENGTH:
|
||||
return result
|
||||
return result[:_MAX_LENGTH] + ' [truncated]...'
|
||||
|
||||
class _AssertRaisesContext(object):
|
||||
"""A context manager used to implement TestCase.assertRaises* methods."""
|
||||
|
||||
def __init__(self, expected, test_case, expected_regexp=None):
|
||||
self.expected = expected
|
||||
self.failureException = test_case.failureException
|
||||
self.expected_regexp = expected_regexp
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, tb):
|
||||
if exc_type is None:
|
||||
try:
|
||||
exc_name = self.expected.__name__
|
||||
except AttributeError:
|
||||
exc_name = str(self.expected)
|
||||
raise self.failureException(
|
||||
"{0} not raised".format(exc_name))
|
||||
if not issubclass(exc_type, self.expected):
|
||||
# let unexpected exceptions pass through
|
||||
return False
|
||||
self.exception = exc_value # store for later retrieval
|
||||
if self.expected_regexp is None:
|
||||
return True
|
||||
|
||||
expected_regexp = self.expected_regexp
|
||||
if isinstance(expected_regexp, basestring):
|
||||
expected_regexp = re.compile(expected_regexp)
|
||||
if not expected_regexp.search(str(exc_value)):
|
||||
raise self.failureException('"%s" does not match "%s"' %
|
||||
(expected_regexp.pattern, str(exc_value)))
|
||||
return True
|
||||
|
||||
class _Py26FixTestCase(object):
|
||||
def assertIsNone(self, obj, msg=None):
|
||||
"""Same as self.assertTrue(obj is None), with a nicer default message."""
|
||||
if obj is not None:
|
||||
standardMsg = '%s is not None' % (safe_repr(obj),)
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIsNotNone(self, obj, msg=None):
|
||||
"""Included for symmetry with assertIsNone."""
|
||||
if obj is None:
|
||||
standardMsg = 'unexpectedly None'
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIn(self, member, container, msg=None):
|
||||
"""Just like self.assertTrue(a in b), but with a nicer default message."""
|
||||
if member not in container:
|
||||
standardMsg = '%s not found in %s' % (safe_repr(member),
|
||||
safe_repr(container))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertNotIn(self, member, container, msg=None):
|
||||
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
|
||||
if member in container:
|
||||
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
|
||||
safe_repr(container))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIs(self, expr1, expr2, msg=None):
|
||||
"""Just like self.assertTrue(a is b), but with a nicer default message."""
|
||||
if expr1 is not expr2:
|
||||
standardMsg = '%s is not %s' % (safe_repr(expr1),
|
||||
safe_repr(expr2))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIsNot(self, expr1, expr2, msg=None):
|
||||
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
|
||||
if expr1 is expr2:
|
||||
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
else:
|
||||
class _Py26FixTestCase(object):
|
||||
pass
|
||||
|
||||
|
||||
class TestScript(TestCase, _Py26FixTestCase):
|
||||
def test_wrong_script(self):
|
||||
self.assertRaises(ValueError, lambda: Script('Azer'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Script('Latn'), Script('Latn'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Script('Hira')), hash('Hira'))
|
||||
|
||||
|
||||
class TestCountry(TestCase, _Py26FixTestCase):
|
||||
def test_wrong_country(self):
|
||||
self.assertRaises(ValueError, lambda: Country('ZZ'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Country('US'), Country('US'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Country('GB'), Country('US'))
|
||||
self.assertIsNotNone(Country('US'))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Country('US')), hash('US'))
|
||||
|
||||
def test_converter_name(self):
|
||||
self.assertEqual(Country('US').name, 'UNITED STATES')
|
||||
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
|
||||
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
|
||||
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
|
||||
self.assertEqual(len(country_converters['name'].codes), 249)
|
||||
|
||||
|
||||
class TestLanguage(TestCase, _Py26FixTestCase):
|
||||
def test_languages(self):
|
||||
self.assertEqual(len(LANGUAGES), 7874)
|
||||
|
||||
def test_wrong_language(self):
|
||||
self.assertRaises(ValueError, lambda: Language('zzz'))
|
||||
|
||||
def test_unknown_language(self):
|
||||
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
|
||||
|
||||
def test_converter_alpha2(self):
|
||||
self.assertEqual(Language('eng').alpha2, 'en')
|
||||
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
|
||||
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
|
||||
self.assertEqual(len(language_converters['alpha2'].codes), 184)
|
||||
|
||||
def test_converter_alpha3b(self):
|
||||
self.assertEqual(Language('fra').alpha3b, 'fre')
|
||||
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
|
||||
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
|
||||
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
|
||||
|
||||
def test_converter_alpha3t(self):
|
||||
self.assertEqual(Language('fra').alpha3t, 'fra')
|
||||
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
|
||||
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
|
||||
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
|
||||
|
||||
def test_converter_name(self):
|
||||
self.assertEqual(Language('eng').name, 'English')
|
||||
self.assertEqual(Language.fromname('English'), Language('eng'))
|
||||
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
|
||||
self.assertEqual(len(language_converters['name'].codes), 7874)
|
||||
|
||||
def test_converter_scope(self):
|
||||
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
|
||||
self.assertEqual(Language('eng').scope, 'individual')
|
||||
self.assertEqual(Language('und').scope, 'special')
|
||||
|
||||
def test_converter_type(self):
|
||||
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
|
||||
self.assertEqual(Language('eng').type, 'living')
|
||||
self.assertEqual(Language('und').type, 'special')
|
||||
|
||||
def test_converter_opensubtitles(self):
|
||||
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
|
||||
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
|
||||
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
|
||||
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
|
||||
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
|
||||
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
|
||||
# unofficially accepted as Serbian from Montenegro
|
||||
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
|
||||
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
|
||||
self.assertEqual(len(language_converters['opensubtitles'].codes), 606)
|
||||
|
||||
# test with all the LANGUAGES from the opensubtitles api
|
||||
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
|
||||
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
|
||||
if not int(upload_enabled) and not int(web_enabled):
|
||||
# do not test LANGUAGES that are too esoteric / not widely available
|
||||
continue
|
||||
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
|
||||
if alpha2:
|
||||
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
|
||||
f.close()
|
||||
|
||||
def test_fromietf_country_script(self):
|
||||
language = Language.fromietf('fra-FR-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_country_no_script(self):
|
||||
language = Language.fromietf('fra-FR')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertIsNone(language.script)
|
||||
|
||||
def test_fromietf_no_country_no_script(self):
|
||||
language = Language.fromietf('fra-FR')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertIsNone(language.script)
|
||||
|
||||
def test_fromietf_no_country_script(self):
|
||||
language = Language.fromietf('fra-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertIsNone(language.country)
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_alpha2_language(self):
|
||||
language = Language.fromietf('fr-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertIsNone(language.country)
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_wrong_language(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
|
||||
|
||||
def test_fromietf_wrong_country(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
|
||||
|
||||
def test_fromietf_wrong_script(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Language('eng'), Language('eng'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Language('fra'), Language('eng'))
|
||||
self.assertIsNotNone(Language('fra'))
|
||||
|
||||
def test_nonzero(self):
|
||||
self.assertFalse(bool(Language('und')))
|
||||
self.assertTrue(bool(Language('eng')))
|
||||
|
||||
def test_language_hasattr(self):
|
||||
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
|
||||
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
|
||||
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
|
||||
|
||||
def test_country(self):
|
||||
self.assertEqual(Language('por', 'BR').country, Country('BR'))
|
||||
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
|
||||
|
||||
def test_eq_with_country(self):
|
||||
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
|
||||
|
||||
def test_ne_with_country(self):
|
||||
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
|
||||
|
||||
def test_script(self):
|
||||
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
|
||||
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
|
||||
|
||||
def test_eq_with_script(self):
|
||||
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
|
||||
|
||||
def test_ne_with_script(self):
|
||||
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
|
||||
|
||||
def test_eq_with_country_and_script(self):
|
||||
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
|
||||
|
||||
def test_ne_with_country_and_script(self):
|
||||
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Language('fra')), hash('fr'))
|
||||
self.assertEqual(hash(Language('ace')), hash('ace'))
|
||||
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
|
||||
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
|
||||
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
|
||||
|
||||
def test_str(self):
|
||||
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
|
||||
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
|
||||
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
|
||||
|
||||
def test_register_converter(self):
|
||||
class TestConverter(LanguageReverseConverter):
|
||||
def __init__(self):
|
||||
self.to_test = {'fra': 'test1', 'eng': 'test2'}
|
||||
self.from_test = {'test1': 'fra', 'test2': 'eng'}
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if alpha3 not in self.to_test:
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
return self.to_test[alpha3]
|
||||
|
||||
def reverse(self, test):
|
||||
if test not in self.from_test:
|
||||
raise LanguageReverseError(test)
|
||||
return (self.from_test[test], None)
|
||||
language = Language('fra')
|
||||
self.assertFalse(hasattr(language, 'test'))
|
||||
language_converters['test'] = TestConverter()
|
||||
self.assertTrue(hasattr(language, 'test'))
|
||||
self.assertIn('test', language_converters)
|
||||
self.assertEqual(Language('fra').test, 'test1')
|
||||
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
|
||||
del language_converters['test']
|
||||
self.assertNotIn('test', language_converters)
|
||||
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
|
||||
self.assertRaises(AttributeError, lambda: Language('fra').test)
|
||||
|
||||
|
||||
def suite():
|
||||
suite = TestSuite()
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
|
||||
return suite
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner().run(suite())
|
249
lib/guessit/ISO-3166-1_utf8.txt
Normal file
249
lib/guessit/ISO-3166-1_utf8.txt
Normal file
|
@ -0,0 +1,249 @@
|
|||
Afghanistan|AF|AFG|004|ISO 3166-2:AF
|
||||
Åland Islands|AX|ALA|248|ISO 3166-2:AX
|
||||
Albania|AL|ALB|008|ISO 3166-2:AL
|
||||
Algeria|DZ|DZA|012|ISO 3166-2:DZ
|
||||
American Samoa|AS|ASM|016|ISO 3166-2:AS
|
||||
Andorra|AD|AND|020|ISO 3166-2:AD
|
||||
Angola|AO|AGO|024|ISO 3166-2:AO
|
||||
Anguilla|AI|AIA|660|ISO 3166-2:AI
|
||||
Antarctica|AQ|ATA|010|ISO 3166-2:AQ
|
||||
Antigua and Barbuda|AG|ATG|028|ISO 3166-2:AG
|
||||
Argentina|AR|ARG|032|ISO 3166-2:AR
|
||||
Armenia|AM|ARM|051|ISO 3166-2:AM
|
||||
Aruba|AW|ABW|533|ISO 3166-2:AW
|
||||
Australia|AU|AUS|036|ISO 3166-2:AU
|
||||
Austria|AT|AUT|040|ISO 3166-2:AT
|
||||
Azerbaijan|AZ|AZE|031|ISO 3166-2:AZ
|
||||
Bahamas|BS|BHS|044|ISO 3166-2:BS
|
||||
Bahrain|BH|BHR|048|ISO 3166-2:BH
|
||||
Bangladesh|BD|BGD|050|ISO 3166-2:BD
|
||||
Barbados|BB|BRB|052|ISO 3166-2:BB
|
||||
Belarus|BY|BLR|112|ISO 3166-2:BY
|
||||
Belgium|BE|BEL|056|ISO 3166-2:BE
|
||||
Belize|BZ|BLZ|084|ISO 3166-2:BZ
|
||||
Benin|BJ|BEN|204|ISO 3166-2:BJ
|
||||
Bermuda|BM|BMU|060|ISO 3166-2:BM
|
||||
Bhutan|BT|BTN|064|ISO 3166-2:BT
|
||||
Bolivia, Plurinational State of|BO|BOL|068|ISO 3166-2:BO
|
||||
Bonaire, Sint Eustatius and Saba|BQ|BES|535|ISO 3166-2:BQ
|
||||
Bosnia and Herzegovina|BA|BIH|070|ISO 3166-2:BA
|
||||
Botswana|BW|BWA|072|ISO 3166-2:BW
|
||||
Bouvet Island|BV|BVT|074|ISO 3166-2:BV
|
||||
Brazil|BR|BRA|076|ISO 3166-2:BR
|
||||
British Indian Ocean Territory|IO|IOT|086|ISO 3166-2:IO
|
||||
Brunei Darussalam|BN|BRN|096|ISO 3166-2:BN
|
||||
Bulgaria|BG|BGR|100|ISO 3166-2:BG
|
||||
Burkina Faso|BF|BFA|854|ISO 3166-2:BF
|
||||
Burundi|BI|BDI|108|ISO 3166-2:BI
|
||||
Cambodia|KH|KHM|116|ISO 3166-2:KH
|
||||
Cameroon|CM|CMR|120|ISO 3166-2:CM
|
||||
Canada|CA|CAN|124|ISO 3166-2:CA
|
||||
Cape Verde|CV|CPV|132|ISO 3166-2:CV
|
||||
Cayman Islands|KY|CYM|136|ISO 3166-2:KY
|
||||
Central African Republic|CF|CAF|140|ISO 3166-2:CF
|
||||
Chad|TD|TCD|148|ISO 3166-2:TD
|
||||
Chile|CL|CHL|152|ISO 3166-2:CL
|
||||
China|CN|CHN|156|ISO 3166-2:CN
|
||||
Christmas Island|CX|CXR|162|ISO 3166-2:CX
|
||||
Cocos (Keeling) Islands|CC|CCK|166|ISO 3166-2:CC
|
||||
Colombia|CO|COL|170|ISO 3166-2:CO
|
||||
Comoros|KM|COM|174|ISO 3166-2:KM
|
||||
Congo|CG|COG|178|ISO 3166-2:CG
|
||||
Congo, the Democratic Republic of the|CD|COD|180|ISO 3166-2:CD
|
||||
Cook Islands|CK|COK|184|ISO 3166-2:CK
|
||||
Costa Rica|CR|CRI|188|ISO 3166-2:CR
|
||||
Côte d'Ivoire|CI|CIV|384|ISO 3166-2:CI
|
||||
Croatia|HR|HRV|191|ISO 3166-2:HR
|
||||
Cuba|CU|CUB|192|ISO 3166-2:CU
|
||||
Curaçao|CW|CUW|531|ISO 3166-2:CW
|
||||
Cyprus|CY|CYP|196|ISO 3166-2:CY
|
||||
Czech Republic|CZ|CZE|203|ISO 3166-2:CZ
|
||||
Denmark|DK|DNK|208|ISO 3166-2:DK
|
||||
Djibouti|DJ|DJI|262|ISO 3166-2:DJ
|
||||
Dominica|DM|DMA|212|ISO 3166-2:DM
|
||||
Dominican Republic|DO|DOM|214|ISO 3166-2:DO
|
||||
Ecuador|EC|ECU|218|ISO 3166-2:EC
|
||||
Egypt|EG|EGY|818|ISO 3166-2:EG
|
||||
El Salvador|SV|SLV|222|ISO 3166-2:SV
|
||||
Equatorial Guinea|GQ|GNQ|226|ISO 3166-2:GQ
|
||||
Eritrea|ER|ERI|232|ISO 3166-2:ER
|
||||
Estonia|EE|EST|233|ISO 3166-2:EE
|
||||
Ethiopia|ET|ETH|231|ISO 3166-2:ET
|
||||
Falkland Islands (Malvinas|FK|FLK|238|ISO 3166-2:FK
|
||||
Faroe Islands|FO|FRO|234|ISO 3166-2:FO
|
||||
Fiji|FJ|FJI|242|ISO 3166-2:FJ
|
||||
Finland|FI|FIN|246|ISO 3166-2:FI
|
||||
France|FR|FRA|250|ISO 3166-2:FR
|
||||
French Guiana|GF|GUF|254|ISO 3166-2:GF
|
||||
French Polynesia|PF|PYF|258|ISO 3166-2:PF
|
||||
French Southern Territories|TF|ATF|260|ISO 3166-2:TF
|
||||
Gabon|GA|GAB|266|ISO 3166-2:GA
|
||||
Gambia|GM|GMB|270|ISO 3166-2:GM
|
||||
Georgia|GE|GEO|268|ISO 3166-2:GE
|
||||
Germany|DE|DEU|276|ISO 3166-2:DE
|
||||
Ghana|GH|GHA|288|ISO 3166-2:GH
|
||||
Gibraltar|GI|GIB|292|ISO 3166-2:GI
|
||||
Greece|GR|GRC|300|ISO 3166-2:GR
|
||||
Greenland|GL|GRL|304|ISO 3166-2:GL
|
||||
Grenada|GD|GRD|308|ISO 3166-2:GD
|
||||
Guadeloupe|GP|GLP|312|ISO 3166-2:GP
|
||||
Guam|GU|GUM|316|ISO 3166-2:GU
|
||||
Guatemala|GT|GTM|320|ISO 3166-2:GT
|
||||
Guernsey|GG|GGY|831|ISO 3166-2:GG
|
||||
Guinea|GN|GIN|324|ISO 3166-2:GN
|
||||
Guinea-Bissau|GW|GNB|624|ISO 3166-2:GW
|
||||
Guyana|GY|GUY|328|ISO 3166-2:GY
|
||||
Haiti|HT|HTI|332|ISO 3166-2:HT
|
||||
Heard Island and McDonald Islands|HM|HMD|334|ISO 3166-2:HM
|
||||
Holy See (Vatican City State|VA|VAT|336|ISO 3166-2:VA
|
||||
Honduras|HN|HND|340|ISO 3166-2:HN
|
||||
Hong Kong|HK|HKG|344|ISO 3166-2:HK
|
||||
Hungary|HU|HUN|348|ISO 3166-2:HU
|
||||
Iceland|IS|ISL|352|ISO 3166-2:IS
|
||||
India|IN|IND|356|ISO 3166-2:IN
|
||||
Indonesia|ID|IDN|360|ISO 3166-2:ID
|
||||
Iran, Islamic Republic of|IR|IRN|364|ISO 3166-2:IR
|
||||
Iraq|IQ|IRQ|368|ISO 3166-2:IQ
|
||||
Ireland|IE|IRL|372|ISO 3166-2:IE
|
||||
Isle of Man|IM|IMN|833|ISO 3166-2:IM
|
||||
Israel|IL|ISR|376|ISO 3166-2:IL
|
||||
Italy|IT|ITA|380|ISO 3166-2:IT
|
||||
Jamaica|JM|JAM|388|ISO 3166-2:JM
|
||||
Japan|JP|JPN|392|ISO 3166-2:JP
|
||||
Jersey|JE|JEY|832|ISO 3166-2:JE
|
||||
Jordan|JO|JOR|400|ISO 3166-2:JO
|
||||
Kazakhstan|KZ|KAZ|398|ISO 3166-2:KZ
|
||||
Kenya|KE|KEN|404|ISO 3166-2:KE
|
||||
Kiribati|KI|KIR|296|ISO 3166-2:KI
|
||||
Korea, Democratic People's Republic of|KP|PRK|408|ISO 3166-2:KP
|
||||
Korea, Republic of|KR|KOR|410|ISO 3166-2:KR
|
||||
Kuwait|KW|KWT|414|ISO 3166-2:KW
|
||||
Kyrgyzstan|KG|KGZ|417|ISO 3166-2:KG
|
||||
Lao People's Democratic Republic|LA|LAO|418|ISO 3166-2:LA
|
||||
Latvia|LV|LVA|428|ISO 3166-2:LV
|
||||
Lebanon|LB|LBN|422|ISO 3166-2:LB
|
||||
Lesotho|LS|LSO|426|ISO 3166-2:LS
|
||||
Liberia|LR|LBR|430|ISO 3166-2:LR
|
||||
Libya|LY|LBY|434|ISO 3166-2:LY
|
||||
Liechtenstein|LI|LIE|438|ISO 3166-2:LI
|
||||
Lithuania|LT|LTU|440|ISO 3166-2:LT
|
||||
Luxembourg|LU|LUX|442|ISO 3166-2:LU
|
||||
Macao|MO|MAC|446|ISO 3166-2:MO
|
||||
Macedonia, the former Yugoslav Republic of|MK|MKD|807|ISO 3166-2:MK
|
||||
Madagascar|MG|MDG|450|ISO 3166-2:MG
|
||||
Malawi|MW|MWI|454|ISO 3166-2:MW
|
||||
Malaysia|MY|MYS|458|ISO 3166-2:MY
|
||||
Maldives|MV|MDV|462|ISO 3166-2:MV
|
||||
Mali|ML|MLI|466|ISO 3166-2:ML
|
||||
Malta|MT|MLT|470|ISO 3166-2:MT
|
||||
Marshall Islands|MH|MHL|584|ISO 3166-2:MH
|
||||
Martinique|MQ|MTQ|474|ISO 3166-2:MQ
|
||||
Mauritania|MR|MRT|478|ISO 3166-2:MR
|
||||
Mauritius|MU|MUS|480|ISO 3166-2:MU
|
||||
Mayotte|YT|MYT|175|ISO 3166-2:YT
|
||||
Mexico|MX|MEX|484|ISO 3166-2:MX
|
||||
Micronesia, Federated States of|FM|FSM|583|ISO 3166-2:FM
|
||||
Moldova, Republic of|MD|MDA|498|ISO 3166-2:MD
|
||||
Monaco|MC|MCO|492|ISO 3166-2:MC
|
||||
Mongolia|MN|MNG|496|ISO 3166-2:MN
|
||||
Montenegro|ME|MNE|499|ISO 3166-2:ME
|
||||
Montserrat|MS|MSR|500|ISO 3166-2:MS
|
||||
Morocco|MA|MAR|504|ISO 3166-2:MA
|
||||
Mozambique|MZ|MOZ|508|ISO 3166-2:MZ
|
||||
Myanmar|MM|MMR|104|ISO 3166-2:MM
|
||||
Namibia|NA|NAM|516|ISO 3166-2:NA
|
||||
Nauru|NR|NRU|520|ISO 3166-2:NR
|
||||
Nepal|NP|NPL|524|ISO 3166-2:NP
|
||||
Netherlands|NL|NLD|528|ISO 3166-2:NL
|
||||
New Caledonia|NC|NCL|540|ISO 3166-2:NC
|
||||
New Zealand|NZ|NZL|554|ISO 3166-2:NZ
|
||||
Nicaragua|NI|NIC|558|ISO 3166-2:NI
|
||||
Niger|NE|NER|562|ISO 3166-2:NE
|
||||
Nigeria|NG|NGA|566|ISO 3166-2:NG
|
||||
Niue|NU|NIU|570|ISO 3166-2:NU
|
||||
Norfolk Island|NF|NFK|574|ISO 3166-2:NF
|
||||
Northern Mariana Islands|MP|MNP|580|ISO 3166-2:MP
|
||||
Norway|NO|NOR|578|ISO 3166-2:NO
|
||||
Oman|OM|OMN|512|ISO 3166-2:OM
|
||||
Pakistan|PK|PAK|586|ISO 3166-2:PK
|
||||
Palau|PW|PLW|585|ISO 3166-2:PW
|
||||
Palestinian Territory, Occupied|PS|PSE|275|ISO 3166-2:PS
|
||||
Panama|PA|PAN|591|ISO 3166-2:PA
|
||||
Papua New Guinea|PG|PNG|598|ISO 3166-2:PG
|
||||
Paraguay|PY|PRY|600|ISO 3166-2:PY
|
||||
Peru|PE|PER|604|ISO 3166-2:PE
|
||||
Philippines|PH|PHL|608|ISO 3166-2:PH
|
||||
Pitcairn|PN|PCN|612|ISO 3166-2:PN
|
||||
Poland|PL|POL|616|ISO 3166-2:PL
|
||||
Portugal|PT|PRT|620|ISO 3166-2:PT
|
||||
Puerto Rico|PR|PRI|630|ISO 3166-2:PR
|
||||
Qatar|QA|QAT|634|ISO 3166-2:QA
|
||||
Réunion|RE|REU|638|ISO 3166-2:RE
|
||||
Romania|RO|ROU|642|ISO 3166-2:RO
|
||||
Russian Federation|RU|RUS|643|ISO 3166-2:RU
|
||||
Rwanda|RW|RWA|646|ISO 3166-2:RW
|
||||
Saint Barthélemy|BL|BLM|652|ISO 3166-2:BL
|
||||
Saint Helena, Ascension and Tristan da Cunha|SH|SHN|654|ISO 3166-2:SH
|
||||
Saint Kitts and Nevis|KN|KNA|659|ISO 3166-2:KN
|
||||
Saint Lucia|LC|LCA|662|ISO 3166-2:LC
|
||||
Saint Martin (French part|MF|MAF|663|ISO 3166-2:MF
|
||||
Saint Pierre and Miquelon|PM|SPM|666|ISO 3166-2:PM
|
||||
Saint Vincent and the Grenadines|VC|VCT|670|ISO 3166-2:VC
|
||||
Samoa|WS|WSM|882|ISO 3166-2:WS
|
||||
San Marino|SM|SMR|674|ISO 3166-2:SM
|
||||
Sao Tome and Principe|ST|STP|678|ISO 3166-2:ST
|
||||
Saudi Arabia|SA|SAU|682|ISO 3166-2:SA
|
||||
Senegal|SN|SEN|686|ISO 3166-2:SN
|
||||
Serbia|RS|SRB|688|ISO 3166-2:RS
|
||||
Seychelles|SC|SYC|690|ISO 3166-2:SC
|
||||
Sierra Leone|SL|SLE|694|ISO 3166-2:SL
|
||||
Singapore|SG|SGP|702|ISO 3166-2:SG
|
||||
Sint Maarten (Dutch part|SX|SXM|534|ISO 3166-2:SX
|
||||
Slovakia|SK|SVK|703|ISO 3166-2:SK
|
||||
Slovenia|SI|SVN|705|ISO 3166-2:SI
|
||||
Solomon Islands|SB|SLB|090|ISO 3166-2:SB
|
||||
Somalia|SO|SOM|706|ISO 3166-2:SO
|
||||
South Africa|ZA|ZAF|710|ISO 3166-2:ZA
|
||||
South Georgia and the South Sandwich Islands|GS|SGS|239|ISO 3166-2:GS
|
||||
South Sudan|SS|SSD|728|ISO 3166-2:SS
|
||||
Spain|ES|ESP|724|ISO 3166-2:ES
|
||||
Sri Lanka|LK|LKA|144|ISO 3166-2:LK
|
||||
Sudan|SD|SDN|729|ISO 3166-2:SD
|
||||
Suriname|SR|SUR|740|ISO 3166-2:SR
|
||||
Svalbard and Jan Mayen|SJ|SJM|744|ISO 3166-2:SJ
|
||||
Swaziland|SZ|SWZ|748|ISO 3166-2:SZ
|
||||
Sweden|SE|SWE|752|ISO 3166-2:SE
|
||||
Switzerland|CH|CHE|756|ISO 3166-2:CH
|
||||
Syrian Arab Republic|SY|SYR|760|ISO 3166-2:SY
|
||||
Taiwan, Province of China|TW|TWN|158|ISO 3166-2:TW
|
||||
Tajikistan|TJ|TJK|762|ISO 3166-2:TJ
|
||||
Tanzania, United Republic of|TZ|TZA|834|ISO 3166-2:TZ
|
||||
Thailand|TH|THA|764|ISO 3166-2:TH
|
||||
Timor-Leste|TL|TLS|626|ISO 3166-2:TL
|
||||
Togo|TG|TGO|768|ISO 3166-2:TG
|
||||
Tokelau|TK|TKL|772|ISO 3166-2:TK
|
||||
Tonga|TO|TON|776|ISO 3166-2:TO
|
||||
Trinidad and Tobago|TT|TTO|780|ISO 3166-2:TT
|
||||
Tunisia|TN|TUN|788|ISO 3166-2:TN
|
||||
Turkey|TR|TUR|792|ISO 3166-2:TR
|
||||
Turkmenistan|TM|TKM|795|ISO 3166-2:TM
|
||||
Turks and Caicos Islands|TC|TCA|796|ISO 3166-2:TC
|
||||
Tuvalu|TV|TUV|798|ISO 3166-2:TV
|
||||
Uganda|UG|UGA|800|ISO 3166-2:UG
|
||||
Ukraine|UA|UKR|804|ISO 3166-2:UA
|
||||
United Arab Emirates|AE|ARE|784|ISO 3166-2:AE
|
||||
United Kingdom|GB|GBR|826|ISO 3166-2:GB
|
||||
United States|US|USA|840|ISO 3166-2:US
|
||||
United States Minor Outlying Islands|UM|UMI|581|ISO 3166-2:UM
|
||||
Uruguay|UY|URY|858|ISO 3166-2:UY
|
||||
Uzbekistan|UZ|UZB|860|ISO 3166-2:UZ
|
||||
Vanuatu|VU|VUT|548|ISO 3166-2:VU
|
||||
Venezuela, Bolivarian Republic of|VE|VEN|862|ISO 3166-2:VE
|
||||
Viet Nam|VN|VNM|704|ISO 3166-2:VN
|
||||
Virgin Islands, British|VG|VGB|092|ISO 3166-2:VG
|
||||
Virgin Islands, U.S|VI|VIR|850|ISO 3166-2:VI
|
||||
Wallis and Futuna|WF|WLF|876|ISO 3166-2:WF
|
||||
Western Sahara|EH|ESH|732|ISO 3166-2:EH
|
||||
Yemen|YE|YEM|887|ISO 3166-2:YE
|
||||
Zambia|ZM|ZMB|894|ISO 3166-2:ZM
|
||||
Zimbabwe|ZW|ZWE|716|ISO 3166-2:ZW
|
485
lib/guessit/ISO-639-2_utf-8.txt
Normal file
485
lib/guessit/ISO-639-2_utf-8.txt
Normal file
|
@ -0,0 +1,485 @@
|
|||
aar||aa|Afar|afar
|
||||
abk||ab|Abkhazian|abkhaze
|
||||
ace|||Achinese|aceh
|
||||
ach|||Acoli|acoli
|
||||
ada|||Adangme|adangme
|
||||
ady|||Adyghe; Adygei|adyghé
|
||||
afa|||Afro-Asiatic languages|afro-asiatiques, langues
|
||||
afh|||Afrihili|afrihili
|
||||
afr||af|Afrikaans|afrikaans
|
||||
ain|||Ainu|aïnou
|
||||
aka||ak|Akan|akan
|
||||
akk|||Akkadian|akkadien
|
||||
alb|sqi|sq|Albanian|albanais
|
||||
ale|||Aleut|aléoute
|
||||
alg|||Algonquian languages|algonquines, langues
|
||||
alt|||Southern Altai|altai du Sud
|
||||
amh||am|Amharic|amharique
|
||||
ang|||English, Old (ca.450-1100)|anglo-saxon (ca.450-1100)
|
||||
anp|||Angika|angika
|
||||
apa|||Apache languages|apaches, langues
|
||||
ara||ar|Arabic|arabe
|
||||
arc|||Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)|araméen d'empire (700-300 BCE)
|
||||
arg||an|Aragonese|aragonais
|
||||
arm|hye|hy|Armenian|arménien
|
||||
arn|||Mapudungun; Mapuche|mapudungun; mapuche; mapuce
|
||||
arp|||Arapaho|arapaho
|
||||
art|||Artificial languages|artificielles, langues
|
||||
arw|||Arawak|arawak
|
||||
asm||as|Assamese|assamais
|
||||
ast|||Asturian; Bable; Leonese; Asturleonese|asturien; bable; léonais; asturoléonais
|
||||
ath|||Athapascan languages|athapascanes, langues
|
||||
aus|||Australian languages|australiennes, langues
|
||||
ava||av|Avaric|avar
|
||||
ave||ae|Avestan|avestique
|
||||
awa|||Awadhi|awadhi
|
||||
aym||ay|Aymara|aymara
|
||||
aze||az|Azerbaijani|azéri
|
||||
bad|||Banda languages|banda, langues
|
||||
bai|||Bamileke languages|bamiléké, langues
|
||||
bak||ba|Bashkir|bachkir
|
||||
bal|||Baluchi|baloutchi
|
||||
bam||bm|Bambara|bambara
|
||||
ban|||Balinese|balinais
|
||||
baq|eus|eu|Basque|basque
|
||||
bas|||Basa|basa
|
||||
bat|||Baltic languages|baltes, langues
|
||||
bej|||Beja; Bedawiyet|bedja
|
||||
bel||be|Belarusian|biélorusse
|
||||
bem|||Bemba|bemba
|
||||
ben||bn|Bengali|bengali
|
||||
ber|||Berber languages|berbères, langues
|
||||
bho|||Bhojpuri|bhojpuri
|
||||
bih||bh|Bihari languages|langues biharis
|
||||
bik|||Bikol|bikol
|
||||
bin|||Bini; Edo|bini; edo
|
||||
bis||bi|Bislama|bichlamar
|
||||
bla|||Siksika|blackfoot
|
||||
bnt|||Bantu (Other)|bantoues, autres langues
|
||||
bos||bs|Bosnian|bosniaque
|
||||
bra|||Braj|braj
|
||||
bre||br|Breton|breton
|
||||
btk|||Batak languages|batak, langues
|
||||
bua|||Buriat|bouriate
|
||||
bug|||Buginese|bugi
|
||||
bul||bg|Bulgarian|bulgare
|
||||
bur|mya|my|Burmese|birman
|
||||
byn|||Blin; Bilin|blin; bilen
|
||||
cad|||Caddo|caddo
|
||||
cai|||Central American Indian languages|amérindiennes de L'Amérique centrale, langues
|
||||
car|||Galibi Carib|karib; galibi; carib
|
||||
cat||ca|Catalan; Valencian|catalan; valencien
|
||||
cau|||Caucasian languages|caucasiennes, langues
|
||||
ceb|||Cebuano|cebuano
|
||||
cel|||Celtic languages|celtiques, langues; celtes, langues
|
||||
cha||ch|Chamorro|chamorro
|
||||
chb|||Chibcha|chibcha
|
||||
che||ce|Chechen|tchétchène
|
||||
chg|||Chagatai|djaghataï
|
||||
chi|zho|zh|Chinese|chinois
|
||||
chk|||Chuukese|chuuk
|
||||
chm|||Mari|mari
|
||||
chn|||Chinook jargon|chinook, jargon
|
||||
cho|||Choctaw|choctaw
|
||||
chp|||Chipewyan; Dene Suline|chipewyan
|
||||
chr|||Cherokee|cherokee
|
||||
chu||cu|Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic|slavon d'église; vieux slave; slavon liturgique; vieux bulgare
|
||||
chv||cv|Chuvash|tchouvache
|
||||
chy|||Cheyenne|cheyenne
|
||||
cmc|||Chamic languages|chames, langues
|
||||
cop|||Coptic|copte
|
||||
cor||kw|Cornish|cornique
|
||||
cos||co|Corsican|corse
|
||||
cpe|||Creoles and pidgins, English based|créoles et pidgins basés sur l'anglais
|
||||
cpf|||Creoles and pidgins, French-based |créoles et pidgins basés sur le français
|
||||
cpp|||Creoles and pidgins, Portuguese-based |créoles et pidgins basés sur le portugais
|
||||
cre||cr|Cree|cree
|
||||
crh|||Crimean Tatar; Crimean Turkish|tatar de Crimé
|
||||
crp|||Creoles and pidgins |créoles et pidgins
|
||||
csb|||Kashubian|kachoube
|
||||
cus|||Cushitic languages|couchitiques, langues
|
||||
cze|ces|cs|Czech|tchèque
|
||||
dak|||Dakota|dakota
|
||||
dan||da|Danish|danois
|
||||
dar|||Dargwa|dargwa
|
||||
day|||Land Dayak languages|dayak, langues
|
||||
del|||Delaware|delaware
|
||||
den|||Slave (Athapascan)|esclave (athapascan)
|
||||
dgr|||Dogrib|dogrib
|
||||
din|||Dinka|dinka
|
||||
div||dv|Divehi; Dhivehi; Maldivian|maldivien
|
||||
doi|||Dogri|dogri
|
||||
dra|||Dravidian languages|dravidiennes, langues
|
||||
dsb|||Lower Sorbian|bas-sorabe
|
||||
dua|||Duala|douala
|
||||
dum|||Dutch, Middle (ca.1050-1350)|néerlandais moyen (ca. 1050-1350)
|
||||
dut|nld|nl|Dutch; Flemish|néerlandais; flamand
|
||||
dyu|||Dyula|dioula
|
||||
dzo||dz|Dzongkha|dzongkha
|
||||
efi|||Efik|efik
|
||||
egy|||Egyptian (Ancient)|égyptien
|
||||
eka|||Ekajuk|ekajuk
|
||||
elx|||Elamite|élamite
|
||||
eng||en|English|anglais
|
||||
enm|||English, Middle (1100-1500)|anglais moyen (1100-1500)
|
||||
epo||eo|Esperanto|espéranto
|
||||
est||et|Estonian|estonien
|
||||
ewe||ee|Ewe|éwé
|
||||
ewo|||Ewondo|éwondo
|
||||
fan|||Fang|fang
|
||||
fao||fo|Faroese|féroïen
|
||||
fat|||Fanti|fanti
|
||||
fij||fj|Fijian|fidjien
|
||||
fil|||Filipino; Pilipino|filipino; pilipino
|
||||
fin||fi|Finnish|finnois
|
||||
fiu|||Finno-Ugrian languages|finno-ougriennes, langues
|
||||
fon|||Fon|fon
|
||||
fre|fra|fr|French|français
|
||||
frm|||French, Middle (ca.1400-1600)|français moyen (1400-1600)
|
||||
fro|||French, Old (842-ca.1400)|français ancien (842-ca.1400)
|
||||
frr|||Northern Frisian|frison septentrional
|
||||
frs|||Eastern Frisian|frison oriental
|
||||
fry||fy|Western Frisian|frison occidental
|
||||
ful||ff|Fulah|peul
|
||||
fur|||Friulian|frioulan
|
||||
gaa|||Ga|ga
|
||||
gay|||Gayo|gayo
|
||||
gba|||Gbaya|gbaya
|
||||
gem|||Germanic languages|germaniques, langues
|
||||
geo|kat|ka|Georgian|géorgien
|
||||
ger|deu|de|German|allemand
|
||||
gez|||Geez|guèze
|
||||
gil|||Gilbertese|kiribati
|
||||
gla||gd|Gaelic; Scottish Gaelic|gaélique; gaélique écossais
|
||||
gle||ga|Irish|irlandais
|
||||
glg||gl|Galician|galicien
|
||||
glv||gv|Manx|manx; mannois
|
||||
gmh|||German, Middle High (ca.1050-1500)|allemand, moyen haut (ca. 1050-1500)
|
||||
goh|||German, Old High (ca.750-1050)|allemand, vieux haut (ca. 750-1050)
|
||||
gon|||Gondi|gond
|
||||
gor|||Gorontalo|gorontalo
|
||||
got|||Gothic|gothique
|
||||
grb|||Grebo|grebo
|
||||
grc|||Greek, Ancient (to 1453)|grec ancien (jusqu'à 1453)
|
||||
gre|ell|el|Greek, Modern (1453-)|grec moderne (après 1453)
|
||||
grn||gn|Guarani|guarani
|
||||
gsw|||Swiss German; Alemannic; Alsatian|suisse alémanique; alémanique; alsacien
|
||||
guj||gu|Gujarati|goudjrati
|
||||
gwi|||Gwich'in|gwich'in
|
||||
hai|||Haida|haida
|
||||
hat||ht|Haitian; Haitian Creole|haïtien; créole haïtien
|
||||
hau||ha|Hausa|haoussa
|
||||
haw|||Hawaiian|hawaïen
|
||||
heb||he|Hebrew|hébreu
|
||||
her||hz|Herero|herero
|
||||
hil|||Hiligaynon|hiligaynon
|
||||
him|||Himachali languages; Western Pahari languages|langues himachalis; langues paharis occidentales
|
||||
hin||hi|Hindi|hindi
|
||||
hit|||Hittite|hittite
|
||||
hmn|||Hmong; Mong|hmong
|
||||
hmo||ho|Hiri Motu|hiri motu
|
||||
hrv||hr|Croatian|croate
|
||||
hsb|||Upper Sorbian|haut-sorabe
|
||||
hun||hu|Hungarian|hongrois
|
||||
hup|||Hupa|hupa
|
||||
iba|||Iban|iban
|
||||
ibo||ig|Igbo|igbo
|
||||
ice|isl|is|Icelandic|islandais
|
||||
ido||io|Ido|ido
|
||||
iii||ii|Sichuan Yi; Nuosu|yi de Sichuan
|
||||
ijo|||Ijo languages|ijo, langues
|
||||
iku||iu|Inuktitut|inuktitut
|
||||
ile||ie|Interlingue; Occidental|interlingue
|
||||
ilo|||Iloko|ilocano
|
||||
ina||ia|Interlingua (International Auxiliary Language Association)|interlingua (langue auxiliaire internationale)
|
||||
inc|||Indic languages|indo-aryennes, langues
|
||||
ind||id|Indonesian|indonésien
|
||||
ine|||Indo-European languages|indo-européennes, langues
|
||||
inh|||Ingush|ingouche
|
||||
ipk||ik|Inupiaq|inupiaq
|
||||
ira|||Iranian languages|iraniennes, langues
|
||||
iro|||Iroquoian languages|iroquoises, langues
|
||||
ita||it|Italian|italien
|
||||
jav||jv|Javanese|javanais
|
||||
jbo|||Lojban|lojban
|
||||
jpn||ja|Japanese|japonais
|
||||
jpr|||Judeo-Persian|judéo-persan
|
||||
jrb|||Judeo-Arabic|judéo-arabe
|
||||
kaa|||Kara-Kalpak|karakalpak
|
||||
kab|||Kabyle|kabyle
|
||||
kac|||Kachin; Jingpho|kachin; jingpho
|
||||
kal||kl|Kalaallisut; Greenlandic|groenlandais
|
||||
kam|||Kamba|kamba
|
||||
kan||kn|Kannada|kannada
|
||||
kar|||Karen languages|karen, langues
|
||||
kas||ks|Kashmiri|kashmiri
|
||||
kau||kr|Kanuri|kanouri
|
||||
kaw|||Kawi|kawi
|
||||
kaz||kk|Kazakh|kazakh
|
||||
kbd|||Kabardian|kabardien
|
||||
kha|||Khasi|khasi
|
||||
khi|||Khoisan languages|khoïsan, langues
|
||||
khm||km|Central Khmer|khmer central
|
||||
kho|||Khotanese; Sakan|khotanais; sakan
|
||||
kik||ki|Kikuyu; Gikuyu|kikuyu
|
||||
kin||rw|Kinyarwanda|rwanda
|
||||
kir||ky|Kirghiz; Kyrgyz|kirghiz
|
||||
kmb|||Kimbundu|kimbundu
|
||||
kok|||Konkani|konkani
|
||||
kom||kv|Komi|kom
|
||||
kon||kg|Kongo|kongo
|
||||
kor||ko|Korean|coréen
|
||||
kos|||Kosraean|kosrae
|
||||
kpe|||Kpelle|kpellé
|
||||
krc|||Karachay-Balkar|karatchai balkar
|
||||
krl|||Karelian|carélien
|
||||
kro|||Kru languages|krou, langues
|
||||
kru|||Kurukh|kurukh
|
||||
kua||kj|Kuanyama; Kwanyama|kuanyama; kwanyama
|
||||
kum|||Kumyk|koumyk
|
||||
kur||ku|Kurdish|kurde
|
||||
kut|||Kutenai|kutenai
|
||||
lad|||Ladino|judéo-espagnol
|
||||
lah|||Lahnda|lahnda
|
||||
lam|||Lamba|lamba
|
||||
lao||lo|Lao|lao
|
||||
lat||la|Latin|latin
|
||||
lav||lv|Latvian|letton
|
||||
lez|||Lezghian|lezghien
|
||||
lim||li|Limburgan; Limburger; Limburgish|limbourgeois
|
||||
lin||ln|Lingala|lingala
|
||||
lit||lt|Lithuanian|lituanien
|
||||
lol|||Mongo|mongo
|
||||
loz|||Lozi|lozi
|
||||
ltz||lb|Luxembourgish; Letzeburgesch|luxembourgeois
|
||||
lua|||Luba-Lulua|luba-lulua
|
||||
lub||lu|Luba-Katanga|luba-katanga
|
||||
lug||lg|Ganda|ganda
|
||||
lui|||Luiseno|luiseno
|
||||
lun|||Lunda|lunda
|
||||
luo|||Luo (Kenya and Tanzania)|luo (Kenya et Tanzanie)
|
||||
lus|||Lushai|lushai
|
||||
mac|mkd|mk|Macedonian|macédonien
|
||||
mad|||Madurese|madourais
|
||||
mag|||Magahi|magahi
|
||||
mah||mh|Marshallese|marshall
|
||||
mai|||Maithili|maithili
|
||||
mak|||Makasar|makassar
|
||||
mal||ml|Malayalam|malayalam
|
||||
man|||Mandingo|mandingue
|
||||
mao|mri|mi|Maori|maori
|
||||
map|||Austronesian languages|austronésiennes, langues
|
||||
mar||mr|Marathi|marathe
|
||||
mas|||Masai|massaï
|
||||
may|msa|ms|Malay|malais
|
||||
mdf|||Moksha|moksa
|
||||
mdr|||Mandar|mandar
|
||||
men|||Mende|mendé
|
||||
mga|||Irish, Middle (900-1200)|irlandais moyen (900-1200)
|
||||
mic|||Mi'kmaq; Micmac|mi'kmaq; micmac
|
||||
min|||Minangkabau|minangkabau
|
||||
mis|||Uncoded languages|langues non codées
|
||||
mkh|||Mon-Khmer languages|môn-khmer, langues
|
||||
mlg||mg|Malagasy|malgache
|
||||
mlt||mt|Maltese|maltais
|
||||
mnc|||Manchu|mandchou
|
||||
mni|||Manipuri|manipuri
|
||||
mno|||Manobo languages|manobo, langues
|
||||
moh|||Mohawk|mohawk
|
||||
mon||mn|Mongolian|mongol
|
||||
mos|||Mossi|moré
|
||||
mul|||Multiple languages|multilingue
|
||||
mun|||Munda languages|mounda, langues
|
||||
mus|||Creek|muskogee
|
||||
mwl|||Mirandese|mirandais
|
||||
mwr|||Marwari|marvari
|
||||
myn|||Mayan languages|maya, langues
|
||||
myv|||Erzya|erza
|
||||
nah|||Nahuatl languages|nahuatl, langues
|
||||
nai|||North American Indian languages|nord-amérindiennes, langues
|
||||
nap|||Neapolitan|napolitain
|
||||
nau||na|Nauru|nauruan
|
||||
nav||nv|Navajo; Navaho|navaho
|
||||
nbl||nr|Ndebele, South; South Ndebele|ndébélé du Sud
|
||||
nde||nd|Ndebele, North; North Ndebele|ndébélé du Nord
|
||||
ndo||ng|Ndonga|ndonga
|
||||
nds|||Low German; Low Saxon; German, Low; Saxon, Low|bas allemand; bas saxon; allemand, bas; saxon, bas
|
||||
nep||ne|Nepali|népalais
|
||||
new|||Nepal Bhasa; Newari|nepal bhasa; newari
|
||||
nia|||Nias|nias
|
||||
nic|||Niger-Kordofanian languages|nigéro-kordofaniennes, langues
|
||||
niu|||Niuean|niué
|
||||
nno||nn|Norwegian Nynorsk; Nynorsk, Norwegian|norvégien nynorsk; nynorsk, norvégien
|
||||
nob||nb|Bokmål, Norwegian; Norwegian Bokmål|norvégien bokmål
|
||||
nog|||Nogai|nogaï; nogay
|
||||
non|||Norse, Old|norrois, vieux
|
||||
nor||no|Norwegian|norvégien
|
||||
nqo|||N'Ko|n'ko
|
||||
nso|||Pedi; Sepedi; Northern Sotho|pedi; sepedi; sotho du Nord
|
||||
nub|||Nubian languages|nubiennes, langues
|
||||
nwc|||Classical Newari; Old Newari; Classical Nepal Bhasa|newari classique
|
||||
nya||ny|Chichewa; Chewa; Nyanja|chichewa; chewa; nyanja
|
||||
nym|||Nyamwezi|nyamwezi
|
||||
nyn|||Nyankole|nyankolé
|
||||
nyo|||Nyoro|nyoro
|
||||
nzi|||Nzima|nzema
|
||||
oci||oc|Occitan (post 1500); Provençal|occitan (après 1500); provençal
|
||||
oji||oj|Ojibwa|ojibwa
|
||||
ori||or|Oriya|oriya
|
||||
orm||om|Oromo|galla
|
||||
osa|||Osage|osage
|
||||
oss||os|Ossetian; Ossetic|ossète
|
||||
ota|||Turkish, Ottoman (1500-1928)|turc ottoman (1500-1928)
|
||||
oto|||Otomian languages|otomi, langues
|
||||
paa|||Papuan languages|papoues, langues
|
||||
pag|||Pangasinan|pangasinan
|
||||
pal|||Pahlavi|pahlavi
|
||||
pam|||Pampanga; Kapampangan|pampangan
|
||||
pan||pa|Panjabi; Punjabi|pendjabi
|
||||
pap|||Papiamento|papiamento
|
||||
pau|||Palauan|palau
|
||||
peo|||Persian, Old (ca.600-400 B.C.)|perse, vieux (ca. 600-400 av. J.-C.)
|
||||
per|fas|fa|Persian|persan
|
||||
phi|||Philippine languages|philippines, langues
|
||||
phn|||Phoenician|phénicien
|
||||
pli||pi|Pali|pali
|
||||
pol||pl|Polish|polonais
|
||||
pon|||Pohnpeian|pohnpei
|
||||
por||pt|Portuguese|portugais
|
||||
pra|||Prakrit languages|prâkrit, langues
|
||||
pro|||Provençal, Old (to 1500)|provençal ancien (jusqu'à 1500)
|
||||
pus||ps|Pushto; Pashto|pachto
|
||||
qaa-qtz|||Reserved for local use|réservée à l'usage local
|
||||
que||qu|Quechua|quechua
|
||||
raj|||Rajasthani|rajasthani
|
||||
rap|||Rapanui|rapanui
|
||||
rar|||Rarotongan; Cook Islands Maori|rarotonga; maori des îles Cook
|
||||
roa|||Romance languages|romanes, langues
|
||||
roh||rm|Romansh|romanche
|
||||
rom|||Romany|tsigane
|
||||
rum|ron|ro|Romanian; Moldavian; Moldovan|roumain; moldave
|
||||
run||rn|Rundi|rundi
|
||||
rup|||Aromanian; Arumanian; Macedo-Romanian|aroumain; macédo-roumain
|
||||
rus||ru|Russian|russe
|
||||
sad|||Sandawe|sandawe
|
||||
sag||sg|Sango|sango
|
||||
sah|||Yakut|iakoute
|
||||
sai|||South American Indian (Other)|indiennes d'Amérique du Sud, autres langues
|
||||
sal|||Salishan languages|salishennes, langues
|
||||
sam|||Samaritan Aramaic|samaritain
|
||||
san||sa|Sanskrit|sanskrit
|
||||
sas|||Sasak|sasak
|
||||
sat|||Santali|santal
|
||||
scn|||Sicilian|sicilien
|
||||
sco|||Scots|écossais
|
||||
sel|||Selkup|selkoupe
|
||||
sem|||Semitic languages|sémitiques, langues
|
||||
sga|||Irish, Old (to 900)|irlandais ancien (jusqu'à 900)
|
||||
sgn|||Sign Languages|langues des signes
|
||||
shn|||Shan|chan
|
||||
sid|||Sidamo|sidamo
|
||||
sin||si|Sinhala; Sinhalese|singhalais
|
||||
sio|||Siouan languages|sioux, langues
|
||||
sit|||Sino-Tibetan languages|sino-tibétaines, langues
|
||||
sla|||Slavic languages|slaves, langues
|
||||
slo|slk|sk|Slovak|slovaque
|
||||
slv||sl|Slovenian|slovène
|
||||
sma|||Southern Sami|sami du Sud
|
||||
sme||se|Northern Sami|sami du Nord
|
||||
smi|||Sami languages|sames, langues
|
||||
smj|||Lule Sami|sami de Lule
|
||||
smn|||Inari Sami|sami d'Inari
|
||||
smo||sm|Samoan|samoan
|
||||
sms|||Skolt Sami|sami skolt
|
||||
sna||sn|Shona|shona
|
||||
snd||sd|Sindhi|sindhi
|
||||
snk|||Soninke|soninké
|
||||
sog|||Sogdian|sogdien
|
||||
som||so|Somali|somali
|
||||
son|||Songhai languages|songhai, langues
|
||||
sot||st|Sotho, Southern|sotho du Sud
|
||||
spa||es|Spanish; Castilian|espagnol; castillan
|
||||
srd||sc|Sardinian|sarde
|
||||
srn|||Sranan Tongo|sranan tongo
|
||||
srp||sr|Serbian|serbe
|
||||
srr|||Serer|sérère
|
||||
ssa|||Nilo-Saharan languages|nilo-sahariennes, langues
|
||||
ssw||ss|Swati|swati
|
||||
suk|||Sukuma|sukuma
|
||||
sun||su|Sundanese|soundanais
|
||||
sus|||Susu|soussou
|
||||
sux|||Sumerian|sumérien
|
||||
swa||sw|Swahili|swahili
|
||||
swe||sv|Swedish|suédois
|
||||
syc|||Classical Syriac|syriaque classique
|
||||
syr|||Syriac|syriaque
|
||||
tah||ty|Tahitian|tahitien
|
||||
tai|||Tai languages|tai, langues
|
||||
tam||ta|Tamil|tamoul
|
||||
tat||tt|Tatar|tatar
|
||||
tel||te|Telugu|télougou
|
||||
tem|||Timne|temne
|
||||
ter|||Tereno|tereno
|
||||
tet|||Tetum|tetum
|
||||
tgk||tg|Tajik|tadjik
|
||||
tgl||tl|Tagalog|tagalog
|
||||
tha||th|Thai|thaï
|
||||
tib|bod|bo|Tibetan|tibétain
|
||||
tig|||Tigre|tigré
|
||||
tir||ti|Tigrinya|tigrigna
|
||||
tiv|||Tiv|tiv
|
||||
tkl|||Tokelau|tokelau
|
||||
tlh|||Klingon; tlhIngan-Hol|klingon
|
||||
tli|||Tlingit|tlingit
|
||||
tmh|||Tamashek|tamacheq
|
||||
tog|||Tonga (Nyasa)|tonga (Nyasa)
|
||||
ton||to|Tonga (Tonga Islands)|tongan (Îles Tonga)
|
||||
tpi|||Tok Pisin|tok pisin
|
||||
tsi|||Tsimshian|tsimshian
|
||||
tsn||tn|Tswana|tswana
|
||||
tso||ts|Tsonga|tsonga
|
||||
tuk||tk|Turkmen|turkmène
|
||||
tum|||Tumbuka|tumbuka
|
||||
tup|||Tupi languages|tupi, langues
|
||||
tur||tr|Turkish|turc
|
||||
tut|||Altaic languages|altaïques, langues
|
||||
tvl|||Tuvalu|tuvalu
|
||||
twi||tw|Twi|twi
|
||||
tyv|||Tuvinian|touva
|
||||
udm|||Udmurt|oudmourte
|
||||
uga|||Ugaritic|ougaritique
|
||||
uig||ug|Uighur; Uyghur|ouïgour
|
||||
ukr||uk|Ukrainian|ukrainien
|
||||
umb|||Umbundu|umbundu
|
||||
und|||Undetermined|indéterminée
|
||||
urd||ur|Urdu|ourdou
|
||||
uzb||uz|Uzbek|ouszbek
|
||||
vai|||Vai|vaï
|
||||
ven||ve|Venda|venda
|
||||
vie||vi|Vietnamese|vietnamien
|
||||
vol||vo|Volapük|volapük
|
||||
vot|||Votic|vote
|
||||
wak|||Wakashan languages|wakashanes, langues
|
||||
wal|||Walamo|walamo
|
||||
war|||Waray|waray
|
||||
was|||Washo|washo
|
||||
wel|cym|cy|Welsh|gallois
|
||||
wen|||Sorbian languages|sorabes, langues
|
||||
wln||wa|Walloon|wallon
|
||||
wol||wo|Wolof|wolof
|
||||
xal|||Kalmyk; Oirat|kalmouk; oïrat
|
||||
xho||xh|Xhosa|xhosa
|
||||
yao|||Yao|yao
|
||||
yap|||Yapese|yapois
|
||||
yid||yi|Yiddish|yiddish
|
||||
yor||yo|Yoruba|yoruba
|
||||
ypk|||Yupik languages|yupik, langues
|
||||
zap|||Zapotec|zapotèque
|
||||
zbl|||Blissymbols; Blissymbolics; Bliss|symboles Bliss; Bliss
|
||||
zen|||Zenaga|zenaga
|
||||
zha||za|Zhuang; Chuang|zhuang; chuang
|
||||
znd|||Zande languages|zandé, langues
|
||||
zul||zu|Zulu|zoulou
|
||||
zun|||Zuni|zuni
|
||||
zxx|||No linguistic content; Not applicable|pas de contenu linguistique; non applicable
|
||||
zza|||Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki|zaza; dimili; dimli; kirdki; kirmanjki; zazaki
|
232
lib/guessit/__init__.py
Normal file
232
lib/guessit/__init__.py
Normal file
|
@ -0,0 +1,232 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import pkg_resources
|
||||
from .__version__ import __version__
|
||||
|
||||
__all__ = ['Guess', 'Language',
|
||||
'guess_file_info', 'guess_video_info',
|
||||
'guess_movie_info', 'guess_episode_info']
|
||||
|
||||
|
||||
# Do python3 detection before importing any other module, to be sure that
|
||||
# it will then always be available
|
||||
# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
|
||||
import sys
|
||||
if sys.version_info[0] >= 3: # pragma: no cover
|
||||
PY2, PY3 = False, True
|
||||
unicode_text_type = str
|
||||
native_text_type = str
|
||||
base_text_type = str
|
||||
|
||||
def u(x):
|
||||
return str(x)
|
||||
|
||||
def s(x):
|
||||
return x
|
||||
|
||||
class UnicodeMixin(object):
|
||||
__str__ = lambda x: x.__unicode__()
|
||||
import binascii
|
||||
|
||||
def to_hex(x):
|
||||
return binascii.hexlify(x).decode('utf-8')
|
||||
|
||||
else: # pragma: no cover
|
||||
PY2, PY3 = True, False
|
||||
__all__ = [str(s) for s in __all__] # fix imports for python2
|
||||
unicode_text_type = unicode
|
||||
native_text_type = str
|
||||
base_text_type = basestring
|
||||
|
||||
def u(x):
|
||||
if isinstance(x, str):
|
||||
return x.decode('utf-8')
|
||||
if isinstance(x, list):
|
||||
return [u(s) for s in x]
|
||||
return unicode(x)
|
||||
|
||||
def s(x):
|
||||
if isinstance(x, unicode):
|
||||
return x.encode('utf-8')
|
||||
if isinstance(x, list):
|
||||
return [s(y) for y in x]
|
||||
if isinstance(x, tuple):
|
||||
return tuple(s(y) for y in x)
|
||||
if isinstance(x, dict):
|
||||
return dict((s(key), s(value)) for key, value in x.items())
|
||||
return x
|
||||
|
||||
class UnicodeMixin(object):
|
||||
__str__ = lambda x: unicode(x).encode('utf-8')
|
||||
|
||||
def to_hex(x):
|
||||
return x.encode('hex')
|
||||
|
||||
range = xrange
|
||||
|
||||
from guessit.guess import Guess, merge_all
|
||||
from guessit.language import Language
|
||||
from guessit.matcher import IterativeMatcher
|
||||
from guessit.textutils import clean_string, is_camel, from_camel
|
||||
import os.path
|
||||
import logging
|
||||
import json
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NullHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
# let's be a nicely behaving library
|
||||
h = NullHandler()
|
||||
log.addHandler(h)
|
||||
|
||||
|
||||
def _guess_filename(filename, options=None, **kwargs):
|
||||
mtree = _build_filename_mtree(filename, options=options, **kwargs)
|
||||
_add_camel_properties(mtree, options=options)
|
||||
return mtree.matched()
|
||||
|
||||
|
||||
def _build_filename_mtree(filename, options=None, **kwargs):
|
||||
mtree = IterativeMatcher(filename, options=options, **kwargs)
|
||||
second_pass_options = mtree.second_pass_options
|
||||
if second_pass_options:
|
||||
log.info("Running 2nd pass")
|
||||
merged_options = dict(options)
|
||||
merged_options.update(second_pass_options)
|
||||
mtree = IterativeMatcher(filename, options=merged_options, **kwargs)
|
||||
return mtree
|
||||
|
||||
|
||||
def _add_camel_properties(mtree, options=None, **kwargs):
|
||||
prop = 'title' if mtree.matched().get('type') != 'episode' else 'series'
|
||||
value = mtree.matched().get(prop)
|
||||
_guess_camel_string(mtree, value, options=options, skip_title=False, **kwargs)
|
||||
|
||||
for leaf in mtree.match_tree.unidentified_leaves():
|
||||
value = leaf.value
|
||||
_guess_camel_string(mtree, value, options=options, skip_title=True, **kwargs)
|
||||
|
||||
|
||||
def _guess_camel_string(mtree, string, options=None, skip_title=False, **kwargs):
|
||||
if string and is_camel(string):
|
||||
log.info('"%s" is camel cased. Try to detect more properties.' % (string,))
|
||||
uncameled_value = from_camel(string)
|
||||
camel_tree = _build_filename_mtree(uncameled_value, options=options, name_only=True, skip_title=skip_title, **kwargs)
|
||||
if len(camel_tree.matched()) > 0:
|
||||
# Title has changed.
|
||||
mtree.matched().update(camel_tree.matched())
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def guess_file_info(filename, info=None, options=None, **kwargs):
|
||||
"""info can contain the names of the various plugins, such as 'filename' to
|
||||
detect filename info, or 'hash_md5' to get the md5 hash of the file.
|
||||
|
||||
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
|
||||
>>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
|
||||
>>> g['hash_md5'], g['hash_sha1']
|
||||
('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
|
||||
"""
|
||||
info = info or 'filename'
|
||||
options = options or {}
|
||||
|
||||
result = []
|
||||
hashers = []
|
||||
|
||||
# Force unicode as soon as possible
|
||||
filename = u(filename)
|
||||
|
||||
if isinstance(info, base_text_type):
|
||||
info = [info]
|
||||
|
||||
for infotype in info:
|
||||
if infotype == 'filename':
|
||||
result.append(_guess_filename(filename, options, **kwargs))
|
||||
|
||||
elif infotype == 'hash_mpc':
|
||||
from guessit.hash_mpc import hash_file
|
||||
try:
|
||||
result.append(Guess({infotype: hash_file(filename)},
|
||||
confidence=1.0))
|
||||
except Exception as e:
|
||||
log.warning('Could not compute MPC-style hash because: %s' % e)
|
||||
|
||||
elif infotype == 'hash_ed2k':
|
||||
from guessit.hash_ed2k import hash_file
|
||||
try:
|
||||
result.append(Guess({infotype: hash_file(filename)},
|
||||
confidence=1.0))
|
||||
except Exception as e:
|
||||
log.warning('Could not compute ed2k hash because: %s' % e)
|
||||
|
||||
elif infotype.startswith('hash_'):
|
||||
import hashlib
|
||||
hashname = infotype[5:]
|
||||
try:
|
||||
hasher = getattr(hashlib, hashname)()
|
||||
hashers.append((infotype, hasher))
|
||||
except AttributeError:
|
||||
log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)
|
||||
|
||||
else:
|
||||
log.warning('Invalid infotype: %s' % infotype)
|
||||
|
||||
# do all the hashes now, but on a single pass
|
||||
if hashers:
|
||||
try:
|
||||
blocksize = 8192
|
||||
hasherobjs = dict(hashers).values()
|
||||
|
||||
with open(filename, 'rb') as f:
|
||||
chunk = f.read(blocksize)
|
||||
while chunk:
|
||||
for hasher in hasherobjs:
|
||||
hasher.update(chunk)
|
||||
chunk = f.read(blocksize)
|
||||
|
||||
for infotype, hasher in hashers:
|
||||
result.append(Guess({infotype: hasher.hexdigest()},
|
||||
confidence=1.0))
|
||||
except Exception as e:
|
||||
log.warning('Could not compute hash because: %s' % e)
|
||||
|
||||
result = merge_all(result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def guess_video_info(filename, info=None, options=None, **kwargs):
|
||||
return guess_file_info(filename, info=info, options=options, type='video', **kwargs)
|
||||
|
||||
|
||||
def guess_movie_info(filename, info=None, options=None, **kwargs):
|
||||
return guess_file_info(filename, info=info, options=options, type='movie', **kwargs)
|
||||
|
||||
|
||||
def guess_episode_info(filename, info=None, options=None, **kwargs):
|
||||
return guess_file_info(filename, info=info, options=options, type='episode', **kwargs)
|
217
lib/guessit/__main__.py
Normal file
217
lib/guessit/__main__.py
Normal file
|
@ -0,0 +1,217 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
import logging
|
||||
import os
|
||||
|
||||
from guessit import PY2, u, guess_file_info
|
||||
from guessit.options import option_parser
|
||||
|
||||
|
||||
def guess_file(filename, info='filename', options=None, **kwargs):
|
||||
options = options or {}
|
||||
filename = u(filename)
|
||||
|
||||
print('For:', filename)
|
||||
guess = guess_file_info(filename, info, options, **kwargs)
|
||||
if options.get('yaml'):
|
||||
try:
|
||||
import yaml
|
||||
for k, v in guess.items():
|
||||
if isinstance(v, list) and len(v) == 1:
|
||||
guess[k] = v[0]
|
||||
ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False)
|
||||
i = 0
|
||||
for yline in ystr.splitlines():
|
||||
if i == 0:
|
||||
print("? " + yline[:-1])
|
||||
elif i == 1:
|
||||
print(":" + yline[1:])
|
||||
else:
|
||||
print(yline)
|
||||
i = i + 1
|
||||
return
|
||||
except ImportError: # pragma: no cover
|
||||
print('PyYAML not found. Using default output.')
|
||||
print('GuessIt found:', guess.nice_string(options.get('advanced')))
|
||||
|
||||
|
||||
def _supported_properties():
|
||||
from guessit.plugins import transformers
|
||||
|
||||
all_properties = {}
|
||||
transformers_properties = []
|
||||
for transformer in transformers.all_transformers():
|
||||
supported_properties = transformer.supported_properties()
|
||||
transformers_properties.append((transformer, supported_properties))
|
||||
|
||||
if isinstance(supported_properties, dict):
|
||||
for property_name, possible_values in supported_properties.items():
|
||||
current_possible_values = all_properties.get(property_name)
|
||||
if current_possible_values is None:
|
||||
current_possible_values = []
|
||||
all_properties[property_name] = current_possible_values
|
||||
if possible_values:
|
||||
current_possible_values.extend(possible_values)
|
||||
else:
|
||||
for property_name in supported_properties:
|
||||
current_possible_values = all_properties.get(property_name)
|
||||
if current_possible_values is None:
|
||||
current_possible_values = []
|
||||
all_properties[property_name] = current_possible_values
|
||||
|
||||
return (all_properties, transformers_properties)
|
||||
|
||||
|
||||
def display_transformers():
|
||||
print('GuessIt transformers:')
|
||||
_, transformers_properties = _supported_properties()
|
||||
for transformer, _ in transformers_properties:
|
||||
print('[@] %s (%s)' % (transformer.name, transformer.priority))
|
||||
|
||||
|
||||
def display_properties(values, transformers):
|
||||
print('GuessIt properties:')
|
||||
all_properties, transformers_properties = _supported_properties()
|
||||
if transformers:
|
||||
for transformer, properties_list in transformers_properties:
|
||||
print('[@] %s (%s)' % (transformer.name, transformer.priority))
|
||||
for property_name in properties_list:
|
||||
property_values = all_properties.get(property_name)
|
||||
print(' [+] %s' % (property_name,))
|
||||
if property_values and values:
|
||||
_display_property_values(property_name, indent=4)
|
||||
else:
|
||||
properties_list = []
|
||||
properties_list.extend(all_properties.keys())
|
||||
properties_list.sort()
|
||||
for property_name in properties_list:
|
||||
property_values = all_properties.get(property_name)
|
||||
print(' [+] %s' % (property_name,))
|
||||
if property_values and values:
|
||||
_display_property_values(property_name, indent=4)
|
||||
|
||||
|
||||
def _display_property_values(property_name, indent=2):
|
||||
all_properties, _ = _supported_properties()
|
||||
property_values = all_properties.get(property_name)
|
||||
for property_value in property_values:
|
||||
print(indent * ' ' + '[!] %s' % (property_value,))
|
||||
|
||||
|
||||
def run_demo(episodes=True, movies=True, options=None):
|
||||
# NOTE: tests should not be added here but rather in the tests/ folder
|
||||
# this is just intended as a quick example
|
||||
if episodes:
|
||||
testeps = ['Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
|
||||
'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
|
||||
'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
|
||||
'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
|
||||
'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi',
|
||||
'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
|
||||
'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
|
||||
'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
|
||||
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
|
||||
]
|
||||
|
||||
for f in testeps:
|
||||
print('-' * 80)
|
||||
guess_file(f, options=options, type='episode')
|
||||
|
||||
if movies:
|
||||
testmovies = ['Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
|
||||
'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
|
||||
'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
|
||||
'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
|
||||
'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
|
||||
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten
|
||||
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten
|
||||
'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
|
||||
'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
|
||||
'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
|
||||
'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv',
|
||||
'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi',
|
||||
'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi',
|
||||
'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi',
|
||||
'Movies/Juno (2007)/Juno KLAXXON.avi',
|
||||
'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv',
|
||||
'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt',
|
||||
'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi',
|
||||
'testsmewt_bugs/movies/Baraka_Edition_Collector.avi'
|
||||
]
|
||||
|
||||
for f in testmovies:
|
||||
print('-' * 80)
|
||||
guess_file(f, options=options, type='movie')
|
||||
|
||||
|
||||
def main(args=None, setup_logging=True):
|
||||
if setup_logging:
|
||||
from guessit import slogging
|
||||
slogging.setupLogging()
|
||||
|
||||
if PY2: # pragma: no cover
|
||||
import codecs
|
||||
import locale
|
||||
import sys
|
||||
|
||||
# see http://bugs.python.org/issue2128
|
||||
if os.name == 'nt':
|
||||
for i, a in enumerate(sys.argv):
|
||||
sys.argv[i] = a.decode(locale.getpreferredencoding())
|
||||
|
||||
# see https://github.com/wackou/guessit/issues/43
|
||||
# and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
|
||||
# Wrap sys.stdout into a StreamWriter to allow writing unicode.
|
||||
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
|
||||
|
||||
if args:
|
||||
options, args = option_parser.parse_args(args)
|
||||
else: # pragma: no cover
|
||||
options, args = option_parser.parse_args()
|
||||
if options.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
help_required = True
|
||||
if options.properties or options.values:
|
||||
display_properties(options.values, options.transformers)
|
||||
help_required = False
|
||||
elif options.transformers:
|
||||
display_transformers()
|
||||
help_required = False
|
||||
if options.demo:
|
||||
run_demo(episodes=True, movies=True, options=vars(options))
|
||||
help_required = False
|
||||
else:
|
||||
if args:
|
||||
help_required = False
|
||||
for filename in args:
|
||||
guess_file(filename,
|
||||
info=options.info.split(','),
|
||||
options=vars(options)
|
||||
)
|
||||
|
||||
if help_required: # pragma: no cover
|
||||
option_parser.print_help()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
20
lib/guessit/__version__.py
Normal file
20
lib/guessit/__version__.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
__version__ = '0.7.1'
|
615
lib/guessit/containers.py
Normal file
615
lib/guessit/containers.py
Normal file
|
@ -0,0 +1,615 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from .patterns import compile_pattern, sep
|
||||
from . import base_text_type
|
||||
from .guess import Guess
|
||||
import types
|
||||
|
||||
|
||||
def _get_span(prop, match):
|
||||
"""Retrieves span for a match"""
|
||||
if not prop.global_span and match.re.groups:
|
||||
start = None
|
||||
end = None
|
||||
for i in range(1, match.re.groups + 1):
|
||||
span = match.span(i)
|
||||
if start is None or span[0] < start:
|
||||
start = span[0]
|
||||
if end is None or span[1] > end:
|
||||
end = span[1]
|
||||
return (start, end)
|
||||
else:
|
||||
return match.span()
|
||||
start = span[0]
|
||||
end = span[1]
|
||||
|
||||
|
||||
def _get_groups(compiled_re):
|
||||
"""
|
||||
Retrieves groups from re
|
||||
|
||||
:return: list of group names
|
||||
"""
|
||||
if compiled_re.groups:
|
||||
indexgroup = {}
|
||||
for k, i in compiled_re.groupindex.items():
|
||||
indexgroup[i] = k
|
||||
ret = []
|
||||
for i in range(1, compiled_re.groups + 1):
|
||||
ret.append(indexgroup.get(i, i))
|
||||
return ret
|
||||
else:
|
||||
return [None]
|
||||
|
||||
|
||||
class NoValidator(object):
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
return True
|
||||
|
||||
|
||||
class DefaultValidator(object):
|
||||
"""Make sure our match is surrounded by separators, or by another entry"""
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
start, end = _get_span(prop, match)
|
||||
|
||||
sep_start = start <= 0 or string[start - 1] in sep
|
||||
sep_end = end >= len(string) or string[end] in sep
|
||||
start_by_other = start in entry_end
|
||||
end_by_other = end in entry_start
|
||||
if (sep_start or start_by_other) and (sep_end or end_by_other):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class WeakValidator(DefaultValidator):
|
||||
"""Make sure our match is surrounded by separators and is the first or last element in the string"""
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
if super(WeakValidator, self).validate(prop, string, node, match, entry_start, entry_end):
|
||||
span = match.span()
|
||||
start = span[0]
|
||||
end = span[1]
|
||||
|
||||
at_start = True
|
||||
at_end = True
|
||||
|
||||
while start > 0:
|
||||
start = start - 1
|
||||
if string[start] not in sep:
|
||||
at_start = False
|
||||
break
|
||||
if at_start:
|
||||
return True
|
||||
while end < len(string) - 1:
|
||||
end = end + 1
|
||||
if string[end] not in sep:
|
||||
at_end = False
|
||||
break
|
||||
if at_end:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class LeavesValidator(DefaultValidator):
|
||||
def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
|
||||
self.previous_lambdas = previous_lambdas if not previous_lambdas is None else []
|
||||
self.next_lambdas = next_lambdas if not next_lambdas is None else []
|
||||
if lambdas:
|
||||
self.previous_lambdas.extend(lambdas)
|
||||
self.next_lambdas.extend(lambdas)
|
||||
self.both_side = both_side
|
||||
self.default_ = default_
|
||||
|
||||
"""Make sure our match is surrounded by separators and validates defined lambdas"""
|
||||
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||
if self.default_:
|
||||
super_ret = super(LeavesValidator, self).validate(prop, string, node, match, entry_start, entry_end)
|
||||
else:
|
||||
super_ret = True
|
||||
if not super_ret:
|
||||
return False
|
||||
|
||||
previous_ = self._validate_previous(prop, string, node, match, entry_start, entry_end)
|
||||
if previous_ and self.both_side:
|
||||
return previous_
|
||||
next_ = self._validate_next(prop, string, node, match, entry_start, entry_end)
|
||||
|
||||
if previous_ is None and next_ is None:
|
||||
return super_ret
|
||||
|
||||
if self.both_side:
|
||||
return previous_ and next_
|
||||
else:
|
||||
return previous_ or next_
|
||||
|
||||
def _validate_previous(self, prop, string, node, match, entry_start, entry_end):
|
||||
if self.previous_lambdas:
|
||||
for leaf in node.root.previous_leaves(node):
|
||||
for lambda_ in self.previous_lambdas:
|
||||
ret = self._check_rule(lambda_, leaf)
|
||||
if not ret is None:
|
||||
return ret
|
||||
return False
|
||||
|
||||
def _validate_next(self, prop, string, node, match, entry_start, entry_end):
|
||||
if self.next_lambdas:
|
||||
for leaf in node.root.next_leaves(node):
|
||||
for lambda_ in self.next_lambdas:
|
||||
ret = self._check_rule(lambda_, leaf)
|
||||
if not ret is None:
|
||||
return ret
|
||||
return False
|
||||
|
||||
def _check_rule(self, lambda_, previous_leaf):
|
||||
return lambda_(previous_leaf)
|
||||
|
||||
|
||||
class _Property:
|
||||
"""Represents a property configuration."""
|
||||
def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None):
|
||||
"""
|
||||
:param keys: Keys of the property (format, screenSize, ...)
|
||||
:type keys: string
|
||||
:param canonical_form: Unique value of the property (DVD, 720p, ...)
|
||||
:type canonical_form: string
|
||||
:param pattern: Regexp pattern
|
||||
:type pattern: string
|
||||
:param confidence: confidence
|
||||
:type confidence: float
|
||||
:param enhance: enhance the pattern
|
||||
:type enhance: boolean
|
||||
:param global_span: if True, the whole match span will used to create the Guess.
|
||||
Else, the span from the capturing groups will be used.
|
||||
:type global_span: boolean
|
||||
:param validator: Validator to use
|
||||
:type validator: :class:`DefaultValidator`
|
||||
:param formatter: Formater to use
|
||||
:type formatter: function
|
||||
"""
|
||||
if isinstance(keys, list):
|
||||
self.keys = keys
|
||||
elif isinstance(keys, base_text_type):
|
||||
self.keys = [keys]
|
||||
else:
|
||||
self.keys = []
|
||||
self.canonical_form = canonical_form
|
||||
if not pattern is None:
|
||||
self.pattern = pattern
|
||||
else:
|
||||
self.pattern = canonical_form
|
||||
if self.canonical_form is None and canonical_from_pattern:
|
||||
self.canonical_form = self.pattern
|
||||
self.compiled = compile_pattern(self.pattern, enhance=enhance)
|
||||
for group_name in _get_groups(self.compiled):
|
||||
if isinstance(group_name, base_text_type) and not group_name in self.keys:
|
||||
self.keys.append(group_name)
|
||||
if not self.keys:
|
||||
raise ValueError("No property key is defined")
|
||||
self.confidence = confidence
|
||||
self.global_span = global_span
|
||||
self.validator = validator
|
||||
self.formatter = formatter
|
||||
|
||||
def format(self, value, group_name=None):
|
||||
"""Retrieves the final value from re group match value"""
|
||||
formatter = None
|
||||
if isinstance(self.formatter, dict):
|
||||
formatter = self.formatter.get(group_name)
|
||||
if formatter is None and not group_name is None:
|
||||
formatter = self.formatter.get(None)
|
||||
else:
|
||||
formatter = self.formatter
|
||||
if isinstance(formatter, types.FunctionType):
|
||||
return formatter(value)
|
||||
elif not formatter is None:
|
||||
return formatter.format(value)
|
||||
return value
|
||||
|
||||
def __repr__(self):
|
||||
return "%s: %s" % (self.keys, self.canonical_form if self.canonical_form else self.pattern)
|
||||
|
||||
|
||||
class PropertiesContainer(object):
|
||||
def __init__(self, **kwargs):
|
||||
self._properties = []
|
||||
self.default_property_kwargs = kwargs
|
||||
|
||||
def unregister_property(self, name, *canonical_forms):
|
||||
"""Unregister a property canonical forms
|
||||
|
||||
If canonical_forms are specified, only those values will be unregistered
|
||||
|
||||
:param name: Property name to unregister
|
||||
:type name: string
|
||||
:param canonical_forms: Values to unregister
|
||||
:type canonical_forms: varargs of string
|
||||
"""
|
||||
_properties = [prop for prop in self._properties if prop.name == name and (not canonical_forms or prop.canonical_form in canonical_forms)]
|
||||
|
||||
def register_property(self, name, *patterns, **property_params):
|
||||
"""Register property with defined canonical form and patterns.
|
||||
|
||||
:param name: name of the property (format, screenSize, ...)
|
||||
:type name: string
|
||||
:param patterns: regular expression patterns to register for the property canonical_form
|
||||
:type patterns: varargs of string
|
||||
"""
|
||||
properties = []
|
||||
for pattern in patterns:
|
||||
params = dict(self.default_property_kwargs)
|
||||
params.update(property_params)
|
||||
if isinstance(pattern, dict):
|
||||
params.update(pattern)
|
||||
prop = _Property(name, **params)
|
||||
else:
|
||||
prop = _Property(name, pattern, **params)
|
||||
self._properties.append(prop)
|
||||
properties.append(prop)
|
||||
return properties
|
||||
|
||||
def register_canonical_properties(self, name, *canonical_forms, **property_params):
|
||||
"""Register properties from their canonical forms.
|
||||
|
||||
:param name: name of the property (releaseGroup, ...)
|
||||
:type name: string
|
||||
:param canonical_forms: values of the property ('ESiR', 'WAF', 'SEPTiC', ...)
|
||||
:type canonical_forms: varargs of strings
|
||||
"""
|
||||
properties = []
|
||||
for canonical_form in canonical_forms:
|
||||
params = dict(property_params)
|
||||
params['canonical_form'] = canonical_form
|
||||
properties.extend(self.register_property(name, canonical_form, **property_params))
|
||||
return properties
|
||||
|
||||
def unregister_all_properties(self):
|
||||
"""Unregister all defined properties"""
|
||||
self._properties.clear()
|
||||
|
||||
def find_properties(self, string, node, name=None, validate=True, re_match=False, sort=True, multiple=False):
|
||||
"""Find all distinct properties for given string
|
||||
|
||||
If no capturing group is defined in the property, value will be grabbed from the entire match.
|
||||
|
||||
If one ore more unnamed capturing group is defined in the property, first capturing group will be used.
|
||||
|
||||
If named capturing group are defined in the property, they will be returned as property key.
|
||||
|
||||
If validate, found properties will be validated by their defined validator
|
||||
|
||||
If re_match, re.match will be used instead of re.search.
|
||||
|
||||
if sort, found properties will be sorted from longer match to shorter match.
|
||||
|
||||
If multiple is False and multiple values are found for the same property, the more confident one will be returned.
|
||||
|
||||
If multiple is False and multiple values are found for the same property and the same confidence, the longer will be returned.
|
||||
|
||||
:param string: input string
|
||||
:type string: string
|
||||
|
||||
:param node: current node of the matching tree
|
||||
:type node: :class:`guessit.matchtree.MatchTree`
|
||||
|
||||
:param name: name of property to find
|
||||
:type name: string
|
||||
|
||||
:param re_match: use re.match instead of re.search
|
||||
:type re_match: bool
|
||||
|
||||
:param multiple: Allows multiple property values to be returned
|
||||
:type multiple: bool
|
||||
|
||||
:return: found properties
|
||||
:rtype: list of tuples (:class:`_Property`, match, list of tuples (property_name, tuple(value_start, value_end)))
|
||||
|
||||
:see: `_Property`
|
||||
:see: `register_property`
|
||||
:see: `register_canonical_properties`
|
||||
"""
|
||||
entry_start = {}
|
||||
entry_end = {}
|
||||
|
||||
entries = []
|
||||
|
||||
ret = []
|
||||
|
||||
if not string.strip():
|
||||
return ret
|
||||
|
||||
# search all properties
|
||||
for prop in self.get_properties(name):
|
||||
match = prop.compiled.match(string) if re_match else prop.compiled.search(string)
|
||||
if match:
|
||||
entry = prop, match
|
||||
entries.append(entry)
|
||||
|
||||
if validate:
|
||||
# compute entries start and ends
|
||||
for prop, match in entries:
|
||||
start, end = _get_span(prop, match)
|
||||
|
||||
if start not in entry_start:
|
||||
entry_start[start] = [prop]
|
||||
else:
|
||||
entry_start[start].append(prop)
|
||||
|
||||
if end not in entry_end:
|
||||
entry_end[end] = [prop]
|
||||
else:
|
||||
entry_end[end].append(prop)
|
||||
|
||||
# remove invalid values
|
||||
while True:
|
||||
invalid_entries = []
|
||||
for entry in entries:
|
||||
prop, match = entry
|
||||
if not prop.validator.validate(prop, string, node, match, entry_start, entry_end):
|
||||
invalid_entries.append(entry)
|
||||
if not invalid_entries:
|
||||
break
|
||||
for entry in invalid_entries:
|
||||
prop, match = entry
|
||||
entries.remove(entry)
|
||||
invalid_span = _get_span(prop, match)
|
||||
start = invalid_span[0]
|
||||
end = invalid_span[1]
|
||||
entry_start[start].remove(prop)
|
||||
if not entry_start.get(start):
|
||||
del entry_start[start]
|
||||
entry_end[end].remove(prop)
|
||||
if not entry_end.get(end):
|
||||
del entry_end[end]
|
||||
|
||||
if multiple:
|
||||
ret = entries
|
||||
else:
|
||||
# keep only best match if multiple values where found
|
||||
entries_dict = {}
|
||||
for entry in entries:
|
||||
for key in prop.keys:
|
||||
if not key in entries_dict:
|
||||
entries_dict[key] = []
|
||||
entries_dict[key].append(entry)
|
||||
|
||||
for entries in entries_dict.values():
|
||||
if multiple:
|
||||
for entry in entries:
|
||||
ret.append(entry)
|
||||
else:
|
||||
best_ret = {}
|
||||
|
||||
best_prop, best_match = None, None
|
||||
if len(entries) == 1:
|
||||
best_prop, best_match = entries[0]
|
||||
else:
|
||||
for prop, match in entries:
|
||||
start, end = _get_span(prop, match)
|
||||
if not best_prop or \
|
||||
best_prop.confidence < best_prop.confidence or \
|
||||
best_prop.confidence == best_prop.confidence and \
|
||||
best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
|
||||
best_prop, best_match = prop, match
|
||||
|
||||
best_ret[best_prop] = best_match
|
||||
|
||||
for prop, match in best_ret.items():
|
||||
ret.append((prop, match))
|
||||
|
||||
if sort:
|
||||
def _sorting(x):
|
||||
_, x_match = x
|
||||
x_start, x_end = x_match.span()
|
||||
return (x_start - x_end)
|
||||
|
||||
ret.sort(key=_sorting)
|
||||
|
||||
return ret
|
||||
|
||||
def as_guess(self, found_properties, input=None, filter=None, sep_replacement=None, multiple=False, *args, **kwargs):
|
||||
if filter is None:
|
||||
filter = lambda property, *args, **kwargs: True
|
||||
guesses = [] if multiple else None
|
||||
for property in found_properties:
|
||||
prop, match = property
|
||||
first_key = None
|
||||
for key in prop.keys:
|
||||
# First property key will be used as base for effective name
|
||||
if isinstance(key, base_text_type):
|
||||
if first_key is None:
|
||||
first_key = key
|
||||
break
|
||||
property_name = first_key if first_key else None
|
||||
span = _get_span(prop, match)
|
||||
guess = Guess(confidence=prop.confidence, input=input, span=span, prop=property_name)
|
||||
groups = _get_groups(match.re)
|
||||
for group_name in groups:
|
||||
name = group_name if isinstance(group_name, base_text_type) else property_name if property_name not in groups else None
|
||||
if name:
|
||||
value = self._effective_prop_value(prop, group_name, input, match.span(group_name) if group_name else match.span(), sep_replacement)
|
||||
if not value is None:
|
||||
is_string = isinstance(value, base_text_type)
|
||||
if not is_string or is_string and value: # Keep non empty strings and other defined objects
|
||||
if isinstance(value, dict):
|
||||
for k, v in value.items():
|
||||
if k is None:
|
||||
k = name
|
||||
guess[k] = v
|
||||
else:
|
||||
guess[name] = value
|
||||
if group_name:
|
||||
guess.metadata(prop).span = match.span(group_name)
|
||||
if filter(guess):
|
||||
if multiple:
|
||||
guesses.append(guess)
|
||||
else:
|
||||
return guess
|
||||
return guesses
|
||||
|
||||
def _effective_prop_value(self, prop, group_name, input=None, span=None, sep_replacement=None):
|
||||
if prop.canonical_form:
|
||||
return prop.canonical_form
|
||||
if input is None:
|
||||
return None
|
||||
value = input
|
||||
if not span is None:
|
||||
value = value[span[0]:span[1]]
|
||||
value = input[span[0]:span[1]] if input else None
|
||||
if sep_replacement:
|
||||
for sep_char in sep:
|
||||
value = value.replace(sep_char, sep_replacement)
|
||||
if value:
|
||||
value = prop.format(value, group_name)
|
||||
return value
|
||||
|
||||
def get_properties(self, name=None, canonical_form=None):
|
||||
"""Retrieve properties
|
||||
|
||||
:return: Properties
|
||||
:rtype: generator
|
||||
"""
|
||||
for prop in self._properties:
|
||||
if (name is None or name in prop.keys) and (canonical_form is None or prop.canonical_form == canonical_form):
|
||||
yield prop
|
||||
|
||||
def get_supported_properties(self):
|
||||
supported_properties = {}
|
||||
for prop in self.get_properties():
|
||||
for k in prop.keys:
|
||||
values = supported_properties.get(k)
|
||||
if not values:
|
||||
values = set()
|
||||
supported_properties[k] = values
|
||||
if prop.canonical_form:
|
||||
values.add(prop.canonical_form)
|
||||
return supported_properties
|
||||
|
||||
|
||||
class QualitiesContainer():
|
||||
def __init__(self):
|
||||
self._qualities = {}
|
||||
|
||||
def register_quality(self, name, canonical_form, rating):
|
||||
"""Register a quality rating.
|
||||
|
||||
:param name: Name of the property
|
||||
:type name: string
|
||||
:param canonical_form: Value of the property
|
||||
:type canonical_form: string
|
||||
:param rating: Estimated quality rating for the property
|
||||
:type rating: int
|
||||
"""
|
||||
property_qualities = self._qualities.get(name)
|
||||
|
||||
if property_qualities is None:
|
||||
property_qualities = {}
|
||||
self._qualities[name] = property_qualities
|
||||
|
||||
property_qualities[canonical_form] = rating
|
||||
|
||||
def unregister_quality(self, name, *canonical_forms):
|
||||
"""Unregister quality ratings for given property name.
|
||||
|
||||
If canonical_forms are specified, only those values will be unregistered
|
||||
|
||||
:param name: Name of the property
|
||||
:type name: string
|
||||
:param canonical_forms: Value of the property
|
||||
:type canonical_forms: string
|
||||
"""
|
||||
if not canonical_forms:
|
||||
if name in self._qualities:
|
||||
del self._qualities[name]
|
||||
else:
|
||||
property_qualities = self._qualities.get(name)
|
||||
if not property_qualities is None:
|
||||
for property_canonical_form in canonical_forms:
|
||||
if property_canonical_form in property_qualities:
|
||||
del property_qualities[property_canonical_form]
|
||||
if not property_qualities:
|
||||
del self._qualities[name]
|
||||
|
||||
def clear_qualities(self,):
|
||||
"""Unregister all defined quality ratings.
|
||||
"""
|
||||
self._qualities.clear()
|
||||
|
||||
def rate_quality(self, guess, *props):
|
||||
"""Rate the quality of guess.
|
||||
|
||||
:param guess: Guess to rate
|
||||
:type guess: :class:`guessit.guess.Guess`
|
||||
:param props: Properties to include in the rating. if empty, rating will be performed for all guess properties.
|
||||
:type props: varargs of string
|
||||
|
||||
:return: Quality of the guess. The higher, the better.
|
||||
:rtype: int
|
||||
"""
|
||||
rate = 0
|
||||
if not props:
|
||||
props = guess.keys()
|
||||
for prop in props:
|
||||
prop_value = guess.get(prop)
|
||||
prop_qualities = self._qualities.get(prop)
|
||||
if not prop_value is None and not prop_qualities is None:
|
||||
rate += prop_qualities.get(prop_value, 0)
|
||||
return rate
|
||||
|
||||
def best_quality_properties(self, props, *guesses):
|
||||
"""Retrieve the best quality guess, based on given properties
|
||||
|
||||
:param props: Properties to include in the rating
|
||||
:type props: list of strings
|
||||
:param guesses: Guesses to rate
|
||||
:type guesses: :class:`guessit.guess.Guess`
|
||||
|
||||
:return: Best quality guess from all passed guesses
|
||||
:rtype: :class:`guessit.guess.Guess`
|
||||
"""
|
||||
best_guess = None
|
||||
best_rate = None
|
||||
for guess in guesses:
|
||||
rate = self.rate_quality(guess, *props)
|
||||
if best_rate is None or best_rate < rate:
|
||||
best_rate = rate
|
||||
best_guess = guess
|
||||
return best_guess
|
||||
|
||||
def best_quality(self, *guesses):
|
||||
"""Retrieve the best quality guess.
|
||||
|
||||
:param guesses: Guesses to rate
|
||||
:type guesses: :class:`guessit.guess.Guess`
|
||||
|
||||
:return: Best quality guess from all passed guesses
|
||||
:rtype: :class:`guessit.guess.Guess`
|
||||
"""
|
||||
best_guess = None
|
||||
best_rate = None
|
||||
for guess in guesses:
|
||||
rate = self.rate_quality(guess)
|
||||
if best_rate is None or best_rate < rate:
|
||||
best_rate = rate
|
||||
best_guess = guess
|
||||
return best_guess
|
||||
|
111
lib/guessit/country.py
Normal file
111
lib/guessit/country.py
Normal file
|
@ -0,0 +1,111 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit import UnicodeMixin, base_text_type, u
|
||||
from guessit.fileutils import load_file_in_same_dir
|
||||
import logging
|
||||
|
||||
__all__ = ['Country']
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# parsed from http://en.wikipedia.org/wiki/ISO_3166-1
|
||||
#
|
||||
# Description of the fields:
|
||||
# "An English name, an alpha-2 code (when given),
|
||||
# an alpha-3 code (when given), a numeric code, and an ISO 31666-2 code
|
||||
# are all separated by pipe (|) characters."
|
||||
_iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt')
|
||||
|
||||
country_matrix = [l.strip().split('|')
|
||||
for l in _iso3166_contents.strip().split('\n')]
|
||||
|
||||
country_matrix += [['Unknown', 'un', 'unk', '', ''],
|
||||
['Latin America', '', 'lat', '', '']
|
||||
]
|
||||
|
||||
country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
|
||||
country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
|
||||
country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matrix))
|
||||
|
||||
# add here exceptions / non ISO representations
|
||||
# Note: remember to put those exceptions in lower-case, they won't work otherwise
|
||||
country_to_alpha3.update({'latinoamérica': 'lat',
|
||||
'brazilian': 'bra',
|
||||
'españa': 'esp',
|
||||
'uk': 'gbr'
|
||||
})
|
||||
|
||||
country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
|
||||
country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
|
||||
|
||||
|
||||
class Country(UnicodeMixin):
|
||||
"""This class represents a country.
|
||||
|
||||
You can initialize it with pretty much anything, as it knows conversion
|
||||
from ISO-3166 2-letter and 3-letter codes, and an English name.
|
||||
"""
|
||||
|
||||
def __init__(self, country, strict=False):
|
||||
country = u(country.strip().lower())
|
||||
self.alpha3 = country_to_alpha3.get(country)
|
||||
|
||||
if self.alpha3 is None and strict:
|
||||
msg = 'The given string "%s" could not be identified as a country'
|
||||
raise ValueError(msg % country)
|
||||
|
||||
if self.alpha3 is None:
|
||||
self.alpha3 = 'unk'
|
||||
|
||||
@property
|
||||
def alpha2(self):
|
||||
return country_alpha3_to_alpha2[self.alpha3]
|
||||
|
||||
@property
|
||||
def english_name(self):
|
||||
return country_alpha3_to_en_name[self.alpha3]
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.alpha3)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Country):
|
||||
return self.alpha3 == other.alpha3
|
||||
|
||||
if isinstance(other, base_text_type):
|
||||
try:
|
||||
return self == Country(other)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __unicode__(self):
|
||||
return self.english_name
|
||||
|
||||
def __repr__(self):
|
||||
return 'Country(%s)' % self.english_name
|
146
lib/guessit/date.py
Normal file
146
lib/guessit/date.py
Normal file
|
@ -0,0 +1,146 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
import math
|
||||
|
||||
|
||||
_dsep = r'[-/ \.]'
|
||||
_date_rexps = [re.compile(
|
||||
# 20010823
|
||||
r'[^0-9]' +
|
||||
r'(?P<year>[0-9]{4})' +
|
||||
r'(?P<month>[0-9]{2})' +
|
||||
r'(?P<day>[0-9]{2})' +
|
||||
r'[^0-9]'),
|
||||
|
||||
# 2001-08-23
|
||||
re.compile(r'[^0-9]' +
|
||||
r'(?P<year>[0-9]{4})' + _dsep +
|
||||
r'(?P<month>[0-9]{2})' + _dsep +
|
||||
r'(?P<day>[0-9]{2})' +
|
||||
r'[^0-9]'),
|
||||
|
||||
# 23-08-2001
|
||||
re.compile(r'[^0-9]' +
|
||||
r'(?P<day>[0-9]{2})' + _dsep +
|
||||
r'(?P<month>[0-9]{2})' + _dsep +
|
||||
r'(?P<year>[0-9]{4})' +
|
||||
r'[^0-9]'),
|
||||
|
||||
# 23-08-01
|
||||
re.compile(r'[^0-9]' +
|
||||
r'(?P<day>[0-9]{2})' + _dsep +
|
||||
r'(?P<month>[0-9]{2})' + _dsep +
|
||||
r'(?P<year>[0-9]{2})' +
|
||||
r'[^0-9]'),
|
||||
]
|
||||
|
||||
|
||||
def valid_year(year, today=None):
|
||||
"""Check if number is a valid year"""
|
||||
if not today:
|
||||
today = datetime.date.today()
|
||||
return 1920 < year < today.year + 5
|
||||
|
||||
|
||||
def search_year(string):
|
||||
"""Looks for year patterns, and if found return the year and group span.
|
||||
|
||||
Assumes there are sentinels at the beginning and end of the string that
|
||||
always allow matching a non-digit delimiting the date.
|
||||
|
||||
Note this only looks for valid production years, that is between 1920
|
||||
and now + 5 years, so for instance 2000 would be returned as a valid
|
||||
year but 1492 would not.
|
||||
|
||||
>>> search_year(' in the year 2000... ')
|
||||
(2000, (13, 17))
|
||||
|
||||
>>> search_year(' they arrived in 1492. ')
|
||||
(None, None)
|
||||
"""
|
||||
match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
|
||||
if match:
|
||||
year = int(match.group(1))
|
||||
if valid_year(year):
|
||||
return (year, match.span(1))
|
||||
|
||||
return (None, None)
|
||||
|
||||
|
||||
def search_date(string):
|
||||
"""Looks for date patterns, and if found return the date and group span.
|
||||
|
||||
Assumes there are sentinels at the beginning and end of the string that
|
||||
always allow matching a non-digit delimiting the date.
|
||||
|
||||
Year can be defined on two digit only. It will return the nearest possible
|
||||
date from today.
|
||||
|
||||
>>> search_date(' This happened on 2002-04-22. ')
|
||||
(datetime.date(2002, 4, 22), (18, 28))
|
||||
|
||||
>>> search_date(' And this on 17-06-1998. ')
|
||||
(datetime.date(1998, 6, 17), (13, 23))
|
||||
|
||||
>>> search_date(' no date in here ')
|
||||
(None, None)
|
||||
"""
|
||||
|
||||
today = datetime.date.today()
|
||||
for drexp in _date_rexps:
|
||||
match = re.search(drexp, string)
|
||||
if match:
|
||||
d = match.groupdict()
|
||||
year, month, day = int(d['year']), int(d['month']), int(d['day'])
|
||||
# years specified as 2 digits should be adjusted here
|
||||
if year < 100:
|
||||
if year > (today.year % 100) + 5:
|
||||
year = 1900 + year
|
||||
else:
|
||||
year = 2000 + year
|
||||
|
||||
date = None
|
||||
try:
|
||||
date = datetime.date(year, month, day)
|
||||
except ValueError:
|
||||
try:
|
||||
date = datetime.date(year, day, month)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if date is None:
|
||||
continue
|
||||
|
||||
# check date plausibility
|
||||
if not valid_year(date.year, today=today):
|
||||
continue
|
||||
|
||||
# looks like we have a valid date
|
||||
# note: span is [+1,-1] because we don't want to include the
|
||||
# non-digit char
|
||||
start, end = match.span()
|
||||
return (date, (start + 1, end - 1))
|
||||
|
||||
return None, None
|
87
lib/guessit/fileutils.py
Normal file
87
lib/guessit/fileutils.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit import s, u
|
||||
import os.path
|
||||
import zipfile
|
||||
import io
|
||||
|
||||
|
||||
def split_path(path):
|
||||
r"""Splits the given path into the list of folders and the filename (or the
|
||||
last folder if you gave it a folder path.
|
||||
|
||||
If the given path was an absolute path, the first element will always be:
|
||||
- the '/' root folder on Unix systems
|
||||
- the drive letter on Windows systems (eg: r'C:\')
|
||||
- the mount point '\\' on Windows systems (eg: r'\\host\share')
|
||||
|
||||
>>> s(split_path('/usr/bin/smewt'))
|
||||
['/', 'usr', 'bin', 'smewt']
|
||||
|
||||
>>> s(split_path('relative_path/to/my_folder/'))
|
||||
['relative_path', 'to', 'my_folder']
|
||||
|
||||
"""
|
||||
result = []
|
||||
while True:
|
||||
head, tail = os.path.split(path)
|
||||
|
||||
if not head and not tail:
|
||||
return result
|
||||
|
||||
if not tail and head == path:
|
||||
# Make sure we won't have an infinite loop.
|
||||
result = [head] + result
|
||||
return result
|
||||
|
||||
# we just split a directory ending with '/', so tail is empty
|
||||
if not tail:
|
||||
path = head
|
||||
continue
|
||||
|
||||
# otherwise, add the last path fragment and keep splitting
|
||||
result = [tail] + result
|
||||
path = head
|
||||
|
||||
|
||||
def file_in_same_dir(ref_file, desired_file):
|
||||
"""Return the path for a file in the same dir as a given reference file.
|
||||
|
||||
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings')) == os.path.normpath('~/smewt/smewt.settings')
|
||||
True
|
||||
|
||||
"""
|
||||
return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))
|
||||
|
||||
|
||||
def load_file_in_same_dir(ref_file, filename):
|
||||
"""Load a given file. Works even when the file is contained inside a zip."""
|
||||
path = split_path(ref_file)[:-1] + [filename]
|
||||
|
||||
for i, p in enumerate(path):
|
||||
if p.endswith('.zip'):
|
||||
zfilename = os.path.join(*path[:i + 1])
|
||||
zfile = zipfile.ZipFile(zfilename)
|
||||
return zfile.read('/'.join(path[i + 1:]))
|
||||
|
||||
return u(io.open(os.path.join(*path), encoding='utf-8').read())
|
452
lib/guessit/guess.py
Normal file
452
lib/guessit/guess.py
Normal file
|
@ -0,0 +1,452 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit import UnicodeMixin, s, u, base_text_type
|
||||
import json
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GuessMetadata(object):
|
||||
"""GuessMetadata contains confidence, an input string, span and related property.
|
||||
|
||||
If defined on a property of Guess object, it overrides the object defined as global.
|
||||
|
||||
:param parent: The parent metadata, used for undefined properties in self object
|
||||
:type parent: :class: `GuessMedata`
|
||||
:param confidence: The confidence (from 0.0 to 1.0)
|
||||
:type confidence: number
|
||||
:param input: The input string
|
||||
:type input: string
|
||||
:param span: The input string
|
||||
:type span: tuple (int, int)
|
||||
:param prop: The found property definition
|
||||
:type prop: :class `guessit.containers._Property`
|
||||
"""
|
||||
def __init__(self, parent=None, confidence=None, input=None, span=None, prop=None, *args, **kwargs):
|
||||
self.parent = parent
|
||||
if confidence is None and self.parent is None:
|
||||
self._confidence = 1.0
|
||||
else:
|
||||
self._confidence = confidence
|
||||
self._input = input
|
||||
self._span = span
|
||||
self._prop = prop
|
||||
|
||||
@property
|
||||
def confidence(self):
|
||||
"""The confidence
|
||||
|
||||
:rtype: int
|
||||
:return: confidence value
|
||||
"""
|
||||
return self._confidence if not self._confidence is None else self.parent.confidence if self.parent else None
|
||||
|
||||
@confidence.setter
|
||||
def confidence(self, confidence):
|
||||
self._confidence = confidence
|
||||
|
||||
@property
|
||||
def input(self):
|
||||
"""The input
|
||||
|
||||
:rtype: string
|
||||
:return: String used to find this guess value
|
||||
"""
|
||||
return self._input if not self._input is None else self.parent.input if self.parent else None
|
||||
|
||||
@property
|
||||
def span(self):
|
||||
"""The span
|
||||
|
||||
:rtype: tuple (int, int)
|
||||
:return: span of input string used to find this guess value
|
||||
"""
|
||||
return self._span if not self._span is None else self.parent.span if self.parent else None
|
||||
|
||||
@span.setter
|
||||
def span(self, span):
|
||||
"""The span
|
||||
|
||||
:rtype: tuple (int, int)
|
||||
:return: span of input string used to find this guess value
|
||||
"""
|
||||
self._span = span
|
||||
|
||||
@property
|
||||
def prop(self):
|
||||
"""The property
|
||||
|
||||
:rtype: :class:`_Property`
|
||||
:return: The property
|
||||
"""
|
||||
return self._prop if not self._prop is None else self.parent.prop if self.parent else None
|
||||
|
||||
@property
|
||||
def raw(self):
|
||||
"""Return the raw information (original match from the string,
|
||||
not the cleaned version) associated with the given property name."""
|
||||
if self.input and self.span:
|
||||
return self.input[self.span[0]:self.span[1]]
|
||||
return None
|
||||
|
||||
def __repr__(self, *args, **kwargs):
|
||||
return object.__repr__(self, *args, **kwargs)
|
||||
|
||||
|
||||
def _split_kwargs(**kwargs):
|
||||
metadata_args = {}
|
||||
for prop in dir(GuessMetadata):
|
||||
try:
|
||||
metadata_args[prop] = kwargs.pop(prop)
|
||||
except KeyError:
|
||||
pass
|
||||
return metadata_args, kwargs
|
||||
|
||||
|
||||
class Guess(UnicodeMixin, dict):
|
||||
"""A Guess is a dictionary which has an associated confidence for each of
|
||||
its values.
|
||||
|
||||
As it is a subclass of dict, you can use it everywhere you expect a
|
||||
simple dict."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
metadata_kwargs, kwargs = _split_kwargs(**kwargs)
|
||||
self._global_metadata = GuessMetadata(**metadata_kwargs)
|
||||
dict.__init__(self, *args, **kwargs)
|
||||
|
||||
self._metadata = {}
|
||||
for prop in self:
|
||||
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
|
||||
|
||||
def to_dict(self, advanced=False):
|
||||
"""Return the guess as a dict containing only base types, ie:
|
||||
where dates, languages, countries, etc. are converted to strings.
|
||||
|
||||
if advanced is True, return the data as a json string containing
|
||||
also the raw information of the properties."""
|
||||
data = dict(self)
|
||||
for prop, value in data.items():
|
||||
if isinstance(value, datetime.date):
|
||||
data[prop] = value.isoformat()
|
||||
elif isinstance(value, (UnicodeMixin, base_text_type)):
|
||||
data[prop] = u(value)
|
||||
elif isinstance(value, list):
|
||||
data[prop] = [u(x) for x in value]
|
||||
if advanced:
|
||||
metadata = self.metadata(prop)
|
||||
prop_data = {'value': data[prop]}
|
||||
if metadata.raw:
|
||||
prop_data['raw'] = metadata.raw
|
||||
if metadata.confidence:
|
||||
prop_data['confidence'] = metadata.confidence
|
||||
data[prop] = prop_data
|
||||
|
||||
return data
|
||||
|
||||
def nice_string(self, advanced=False):
|
||||
"""Return a string with the property names and their values,
|
||||
that also displays the associated confidence to each property.
|
||||
|
||||
FIXME: doc with param"""
|
||||
if advanced:
|
||||
data = self.to_dict(advanced)
|
||||
return json.dumps(data, indent=4)
|
||||
else:
|
||||
data = self.to_dict()
|
||||
|
||||
parts = json.dumps(data, indent=4).split('\n')
|
||||
for i, p in enumerate(parts):
|
||||
if p[:5] != ' "':
|
||||
continue
|
||||
|
||||
prop = p.split('"')[1]
|
||||
parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:]
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
||||
def __unicode__(self):
|
||||
return u(self.to_dict())
|
||||
|
||||
def metadata(self, prop=None):
|
||||
"""Return the metadata associated with the given property name
|
||||
|
||||
If no property name is given, get the global_metadata
|
||||
"""
|
||||
if prop is None:
|
||||
return self._global_metadata
|
||||
if not prop in self._metadata:
|
||||
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
|
||||
return self._metadata[prop]
|
||||
|
||||
def confidence(self, prop=None):
|
||||
return self.metadata(prop).confidence
|
||||
|
||||
def set_confidence(self, prop, confidence):
|
||||
self.metadata(prop).confidence = confidence
|
||||
|
||||
def raw(self, prop):
|
||||
return self.metadata(prop).raw
|
||||
|
||||
def set(self, prop_name, value, *args, **kwargs):
|
||||
self[prop_name] = value
|
||||
self._metadata[prop_name] = GuessMetadata(parent=self._global_metadata, *args, **kwargs)
|
||||
|
||||
def update(self, other, confidence=None):
|
||||
dict.update(self, other)
|
||||
if isinstance(other, Guess):
|
||||
for prop in other:
|
||||
try:
|
||||
self._metadata[prop] = other._metadata[prop]
|
||||
except KeyError:
|
||||
pass
|
||||
if not confidence is None:
|
||||
for prop in other:
|
||||
self.set_confidence(prop, confidence)
|
||||
|
||||
def update_highest_confidence(self, other):
|
||||
"""Update this guess with the values from the given one. In case
|
||||
there is property present in both, only the one with the highest one
|
||||
is kept."""
|
||||
if not isinstance(other, Guess):
|
||||
raise ValueError('Can only call this function on Guess instances')
|
||||
|
||||
for prop in other:
|
||||
if prop in self and self.metadata(prop).confidence >= other.metadata(prop).confidence:
|
||||
continue
|
||||
self[prop] = other[prop]
|
||||
self._metadata[prop] = other.metadata(prop)
|
||||
|
||||
|
||||
def choose_int(g1, g2):
|
||||
"""Function used by merge_similar_guesses to choose between 2 possible
|
||||
properties when they are integers."""
|
||||
v1, c1 = g1 # value, confidence
|
||||
v2, c2 = g2
|
||||
if (v1 == v2):
|
||||
return (v1, 1 - (1 - c1) * (1 - c2))
|
||||
else:
|
||||
if c1 > c2:
|
||||
return (v1, c1 - c2)
|
||||
else:
|
||||
return (v2, c2 - c1)
|
||||
|
||||
|
||||
def choose_string(g1, g2):
|
||||
"""Function used by merge_similar_guesses to choose between 2 possible
|
||||
properties when they are strings.
|
||||
|
||||
If the 2 strings are similar, or one is contained in the other, the latter is returned
|
||||
with an increased confidence.
|
||||
|
||||
If the 2 strings are dissimilar, the one with the higher confidence is returned, with
|
||||
a weaker confidence.
|
||||
|
||||
Note that here, 'similar' means that 2 strings are either equal, or that they
|
||||
differ very little, such as one string being the other one with the 'the' word
|
||||
prepended to it.
|
||||
|
||||
>>> s(choose_string(('Hello', 0.75), ('World', 0.5)))
|
||||
('Hello', 0.25)
|
||||
|
||||
>>> s(choose_string(('Hello', 0.5), ('hello', 0.5)))
|
||||
('Hello', 0.75)
|
||||
|
||||
>>> s(choose_string(('Hello', 0.4), ('Hello World', 0.4)))
|
||||
('Hello', 0.64)
|
||||
|
||||
>>> s(choose_string(('simpsons', 0.5), ('The Simpsons', 0.5)))
|
||||
('The Simpsons', 0.75)
|
||||
|
||||
"""
|
||||
v1, c1 = g1 # value, confidence
|
||||
v2, c2 = g2
|
||||
|
||||
if not v1:
|
||||
return g2
|
||||
elif not v2:
|
||||
return g1
|
||||
|
||||
v1, v2 = v1.strip(), v2.strip()
|
||||
v1l, v2l = v1.lower(), v2.lower()
|
||||
|
||||
combined_prob = 1 - (1 - c1) * (1 - c2)
|
||||
|
||||
if v1l == v2l:
|
||||
return (v1, combined_prob)
|
||||
|
||||
# check for common patterns
|
||||
elif v1l == 'the ' + v2l:
|
||||
return (v1, combined_prob)
|
||||
elif v2l == 'the ' + v1l:
|
||||
return (v2, combined_prob)
|
||||
|
||||
# if one string is contained in the other, return the shortest one
|
||||
elif v2l in v1l:
|
||||
return (v2, combined_prob)
|
||||
elif v1l in v2l:
|
||||
return (v1, combined_prob)
|
||||
|
||||
# in case of conflict, return the one with highest confidence
|
||||
else:
|
||||
if c1 > c2:
|
||||
return (v1, c1 - c2)
|
||||
else:
|
||||
return (v2, c2 - c1)
|
||||
|
||||
|
||||
def _merge_similar_guesses_nocheck(guesses, prop, choose):
|
||||
"""Take a list of guesses and merge those which have the same properties,
|
||||
increasing or decreasing the confidence depending on whether their values
|
||||
are similar.
|
||||
|
||||
This function assumes there are at least 2 valid guesses."""
|
||||
|
||||
similar = [guess for guess in guesses if prop in guess]
|
||||
|
||||
g1, g2 = similar[0], similar[1]
|
||||
|
||||
other_props = set(g1) & set(g2) - set([prop])
|
||||
if other_props:
|
||||
log.debug('guess 1: %s' % g1)
|
||||
log.debug('guess 2: %s' % g2)
|
||||
for prop in other_props:
|
||||
if g1[prop] != g2[prop]:
|
||||
log.warning('both guesses to be merged have more than one '
|
||||
'different property in common, bailing out...')
|
||||
return
|
||||
|
||||
# merge all props of s2 into s1, updating the confidence for the
|
||||
# considered property
|
||||
v1, v2 = g1[prop], g2[prop]
|
||||
c1, c2 = g1.confidence(prop), g2.confidence(prop)
|
||||
|
||||
new_value, new_confidence = choose((v1, c1), (v2, c2))
|
||||
if new_confidence >= c1:
|
||||
msg = "Updating matching property '%s' with confidence %.2f"
|
||||
else:
|
||||
msg = "Updating non-matching property '%s' with confidence %.2f"
|
||||
log.debug(msg % (prop, new_confidence))
|
||||
|
||||
g2[prop] = new_value
|
||||
g2.set_confidence(prop, new_confidence)
|
||||
|
||||
g1.update(g2)
|
||||
guesses.remove(g2)
|
||||
|
||||
|
||||
def merge_similar_guesses(guesses, prop, choose):
|
||||
"""Take a list of guesses and merge those which have the same properties,
|
||||
increasing or decreasing the confidence depending on whether their values
|
||||
are similar."""
|
||||
|
||||
similar = [guess for guess in guesses if prop in guess]
|
||||
if len(similar) < 2:
|
||||
# nothing to merge
|
||||
return
|
||||
|
||||
if len(similar) == 2:
|
||||
_merge_similar_guesses_nocheck(guesses, prop, choose)
|
||||
|
||||
if len(similar) > 2:
|
||||
log.debug('complex merge, trying our best...')
|
||||
before = len(guesses)
|
||||
_merge_similar_guesses_nocheck(guesses, prop, choose)
|
||||
after = len(guesses)
|
||||
if after < before:
|
||||
# recurse only when the previous call actually did something,
|
||||
# otherwise we end up in an infinite loop
|
||||
merge_similar_guesses(guesses, prop, choose)
|
||||
|
||||
|
||||
def merge_all(guesses, append=None):
|
||||
"""Merge all the guesses in a single result, remove very unlikely values,
|
||||
and return it.
|
||||
You can specify a list of properties that should be appended into a list
|
||||
instead of being merged.
|
||||
|
||||
>>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
|
||||
... Guess({'episodeNumber': 13}, confidence=0.8) ])
|
||||
... ) == {'season': 2, 'episodeNumber': 13}
|
||||
True
|
||||
|
||||
|
||||
>>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
|
||||
... Guess({'season': 1}, confidence=0.2) ])
|
||||
... ) == {'season': 1}
|
||||
True
|
||||
|
||||
>>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
|
||||
... Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
|
||||
... append=['other'])
|
||||
... ) == {'releaseGroup': '2HD', 'other': ['PROPER']}
|
||||
True
|
||||
|
||||
"""
|
||||
result = Guess()
|
||||
if not guesses:
|
||||
return result
|
||||
|
||||
if append is None:
|
||||
append = []
|
||||
|
||||
for g in guesses:
|
||||
# first append our appendable properties
|
||||
for prop in append:
|
||||
if prop in g:
|
||||
result.set(prop, result.get(prop, []) + [g[prop]],
|
||||
# TODO: what to do with confidence here? maybe an
|
||||
# arithmetic mean...
|
||||
confidence=g.metadata(prop).confidence,
|
||||
input=g.metadata(prop).input,
|
||||
span=g.metadata(prop).span,
|
||||
prop=g.metadata(prop).prop)
|
||||
|
||||
del g[prop]
|
||||
|
||||
# then merge the remaining ones
|
||||
dups = set(result) & set(g)
|
||||
if dups:
|
||||
log.warning('duplicate properties %s in merged result...' % [(result[p], g[p]) for p in dups])
|
||||
|
||||
result.update_highest_confidence(g)
|
||||
|
||||
# delete very unlikely values
|
||||
for p in list(result.keys()):
|
||||
if result.confidence(p) < 0.05:
|
||||
del result[p]
|
||||
|
||||
# make sure our appendable properties contain unique values
|
||||
for prop in append:
|
||||
try:
|
||||
value = result[prop]
|
||||
if isinstance(value, list):
|
||||
result[prop] = list(set(value))
|
||||
else:
|
||||
result[prop] = [value]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return result
|
67
lib/guessit/hash_ed2k.py
Normal file
67
lib/guessit/hash_ed2k.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit import s, to_hex
|
||||
import hashlib
|
||||
import os.path
|
||||
|
||||
|
||||
def hash_file(filename):
|
||||
"""Returns the ed2k hash of a given file.
|
||||
|
||||
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
|
||||
>>> s(hash_file(testfile))
|
||||
'ed2k://|file|dummy.srt|59|41F58B913AB3973F593BEBA8B8DF6510|/'
|
||||
"""
|
||||
return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
|
||||
os.path.getsize(filename),
|
||||
hash_filehash(filename).upper())
|
||||
|
||||
|
||||
def hash_filehash(filename):
|
||||
"""Returns the ed2k hash of a given file.
|
||||
|
||||
This function is taken from:
|
||||
http://www.radicand.org/blog/orz/2010/2/21/edonkey2000-hash-in-python/
|
||||
"""
|
||||
md4 = hashlib.new('md4').copy
|
||||
|
||||
def gen(f):
|
||||
while True:
|
||||
x = f.read(9728000)
|
||||
if x:
|
||||
yield x
|
||||
else:
|
||||
return
|
||||
|
||||
def md4_hash(data):
|
||||
m = md4()
|
||||
m.update(data)
|
||||
return m
|
||||
|
||||
with open(filename, 'rb') as f:
|
||||
a = gen(f)
|
||||
hashes = [md4_hash(data).digest() for data in a]
|
||||
if len(hashes) == 1:
|
||||
return to_hex(hashes[0])
|
||||
else:
|
||||
return md4_hash(reduce(lambda a, d: a + d, hashes, "")).hexd
|
58
lib/guessit/hash_mpc.py
Normal file
58
lib/guessit/hash_mpc.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import struct
|
||||
import os
|
||||
|
||||
|
||||
def hash_file(filename):
|
||||
"""This function is taken from:
|
||||
http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
||||
and is licensed under the GPL."""
|
||||
|
||||
longlongformat = b'q' # long long
|
||||
bytesize = struct.calcsize(longlongformat)
|
||||
|
||||
f = open(filename, "rb")
|
||||
|
||||
filesize = os.path.getsize(filename)
|
||||
hash_value = filesize
|
||||
|
||||
if filesize < 65536 * 2:
|
||||
raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
|
||||
|
||||
for x in range(int(65536 / bytesize)):
|
||||
buf = f.read(bytesize)
|
||||
(l_value,) = struct.unpack(longlongformat, buf)
|
||||
hash_value += l_value
|
||||
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
|
||||
|
||||
f.seek(max(0, filesize - 65536), 0)
|
||||
for x in range(int(65536 / bytesize)):
|
||||
buf = f.read(bytesize)
|
||||
(l_value,) = struct.unpack(longlongformat, buf)
|
||||
hash_value += l_value
|
||||
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF
|
||||
|
||||
f.close()
|
||||
|
||||
return "%016x" % hash_value
|
401
lib/guessit/language.py
Normal file
401
lib/guessit/language.py
Normal file
|
@ -0,0 +1,401 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit import UnicodeMixin, base_text_type, u
|
||||
from guessit.textutils import find_words
|
||||
from babelfish import Language
|
||||
import babelfish
|
||||
import re
|
||||
import logging
|
||||
from guessit.guess import Guess
|
||||
|
||||
__all__ = ['Language', 'UNDETERMINED',
|
||||
'search_language', 'guess_language']
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
UNDETERMINED = babelfish.Language('und')
|
||||
|
||||
SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'],
|
||||
('ell', None): ['gr', 'greek'],
|
||||
('spa', None): ['esp', 'español'],
|
||||
('fra', None): ['français', 'vf', 'vff', 'vfi'],
|
||||
('swe', None): ['se'],
|
||||
('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'],
|
||||
('cat', None): ['català'],
|
||||
('ces', None): ['cz'],
|
||||
('ukr', None): ['ua'],
|
||||
('zho', None): ['cn'],
|
||||
('jpn', None): ['jp'],
|
||||
('hrv', None): ['scr'],
|
||||
('mul', None): ['multi', 'dl'], # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
|
||||
}
|
||||
|
||||
|
||||
class GuessitConverter(babelfish.LanguageReverseConverter):
|
||||
|
||||
_with_country_regexp = re.compile('(.*)\((.*)\)')
|
||||
_with_country_regexp2 = re.compile('(.*)-(.*)')
|
||||
|
||||
def __init__(self):
|
||||
self.guessit_exceptions = {}
|
||||
for (alpha3, country), synlist in SYN.items():
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
|
||||
|
||||
@property
|
||||
def codes(self):
|
||||
return (babelfish.language_converters['alpha3b'].codes |
|
||||
babelfish.language_converters['alpha2'].codes |
|
||||
babelfish.language_converters['name'].codes |
|
||||
babelfish.language_converters['opensubtitles'].codes |
|
||||
babelfish.country_converters['name'].codes |
|
||||
frozenset(self.guessit_exceptions.keys()))
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
return str(babelfish.Language(alpha3, country, script))
|
||||
|
||||
def reverse(self, name):
|
||||
with_country = (GuessitConverter._with_country_regexp.match(name) or
|
||||
GuessitConverter._with_country_regexp2.match(name))
|
||||
|
||||
if with_country:
|
||||
lang = babelfish.Language.fromguessit(with_country.group(1).strip())
|
||||
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
|
||||
return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None)
|
||||
|
||||
# exceptions come first, as they need to override a potential match
|
||||
# with any of the other guessers
|
||||
try:
|
||||
return self.guessit_exceptions[name.lower()]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for conv in [babelfish.Language,
|
||||
babelfish.Language.fromalpha3b,
|
||||
babelfish.Language.fromalpha2,
|
||||
babelfish.Language.fromname,
|
||||
babelfish.Language.fromopensubtitles]:
|
||||
try:
|
||||
c = conv(name)
|
||||
return c.alpha3, c.country, c.script
|
||||
except (ValueError, babelfish.LanguageReverseError):
|
||||
pass
|
||||
|
||||
raise babelfish.LanguageReverseError(name)
|
||||
|
||||
|
||||
babelfish.language_converters['guessit'] = GuessitConverter()
|
||||
|
||||
COUNTRIES_SYN = {'ES': ['españa'],
|
||||
'GB': ['UK'],
|
||||
'BR': ['brazilian', 'bra'],
|
||||
# FIXME: this one is a bit of a stretch, not sure how to do
|
||||
# it properly, though...
|
||||
'MX': ['Latinoamérica', 'latin america']
|
||||
}
|
||||
|
||||
|
||||
class GuessitCountryConverter(babelfish.CountryReverseConverter):
|
||||
def __init__(self):
|
||||
self.guessit_exceptions = {}
|
||||
|
||||
for alpha2, synlist in COUNTRIES_SYN.items():
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = alpha2
|
||||
|
||||
@property
|
||||
def codes(self):
|
||||
return (babelfish.country_converters['name'].codes |
|
||||
frozenset(babelfish.COUNTRIES.values()) |
|
||||
frozenset(self.guessit_exceptions.keys()))
|
||||
|
||||
def convert(self, alpha2):
|
||||
return str(babelfish.Country(alpha2))
|
||||
|
||||
def reverse(self, name):
|
||||
# exceptions come first, as they need to override a potential match
|
||||
# with any of the other guessers
|
||||
try:
|
||||
return self.guessit_exceptions[name.lower()]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return babelfish.Country(name.upper()).alpha2
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
for conv in [babelfish.Country.fromname]:
|
||||
try:
|
||||
return conv(name).alpha2
|
||||
except babelfish.CountryReverseError:
|
||||
pass
|
||||
|
||||
raise babelfish.CountryReverseError(name)
|
||||
|
||||
|
||||
babelfish.country_converters['guessit'] = GuessitCountryConverter()
|
||||
|
||||
|
||||
class Language(UnicodeMixin):
|
||||
"""This class represents a human language.
|
||||
|
||||
You can initialize it with pretty much anything, as it knows conversion
|
||||
from ISO-639 2-letter and 3-letter codes, English and French names.
|
||||
|
||||
You can also distinguish languages for specific countries, such as
|
||||
Portuguese and Brazilian Portuguese.
|
||||
|
||||
There are various properties on the language object that give you the
|
||||
representation of the language for a specific usage, such as .alpha3
|
||||
to get the ISO 3-letter code, or .opensubtitles to get the OpenSubtitles
|
||||
language code.
|
||||
|
||||
>>> Language('fr')
|
||||
Language(French)
|
||||
|
||||
>>> (Language('eng').english_name) == 'English'
|
||||
True
|
||||
|
||||
>>> (Language('pt(br)').country.name) == 'BRAZIL'
|
||||
True
|
||||
|
||||
>>> (Language('zz', strict=False).english_name) == 'Undetermined'
|
||||
True
|
||||
|
||||
>>> (Language('pt(br)').opensubtitles) == 'pob'
|
||||
True
|
||||
"""
|
||||
|
||||
def __init__(self, language, country=None, strict=False):
|
||||
language = u(language.strip().lower())
|
||||
country = babelfish.Country(country.upper()) if country else None
|
||||
|
||||
try:
|
||||
self.lang = babelfish.Language.fromguessit(language)
|
||||
# user given country overrides guessed one
|
||||
if country:
|
||||
self.lang.country = country
|
||||
|
||||
except babelfish.LanguageReverseError:
|
||||
msg = 'The given string "%s" could not be identified as a language' % language
|
||||
if strict:
|
||||
raise ValueError(msg)
|
||||
|
||||
log.debug(msg)
|
||||
self.lang = UNDETERMINED
|
||||
|
||||
@property
|
||||
def country(self):
|
||||
return self.lang.country
|
||||
|
||||
@property
|
||||
def alpha2(self):
|
||||
return self.lang.alpha2
|
||||
|
||||
@property
|
||||
def alpha3(self):
|
||||
return self.lang.alpha3
|
||||
|
||||
@property
|
||||
def alpha3term(self):
|
||||
return self.lang.alpha3b
|
||||
|
||||
@property
|
||||
def english_name(self):
|
||||
return self.lang.name
|
||||
|
||||
@property
|
||||
def opensubtitles(self):
|
||||
return self.lang.opensubtitles
|
||||
|
||||
@property
|
||||
def tmdb(self):
|
||||
if self.country:
|
||||
return '%s-%s' % (self.alpha2, self.country.alpha2)
|
||||
return self.alpha2
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.lang)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Language):
|
||||
# in Guessit, languages are considered equal if their main languages are equal
|
||||
return self.alpha3 == other.alpha3
|
||||
|
||||
if isinstance(other, base_text_type):
|
||||
try:
|
||||
return self == Language(other)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __bool__(self):
|
||||
return self.lang != UNDETERMINED
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __unicode__(self):
|
||||
if self.lang.country:
|
||||
return '%s(%s)' % (self.english_name, self.country.alpha2)
|
||||
else:
|
||||
return self.english_name
|
||||
|
||||
def __repr__(self):
|
||||
if self.lang.country:
|
||||
return 'Language(%s, country=%s)' % (self.english_name, self.lang.country)
|
||||
else:
|
||||
return 'Language(%s)' % self.english_name
|
||||
|
||||
|
||||
# list of common words which could be interpreted as languages, but which
|
||||
# are far too common to be able to say they represent a language in the
|
||||
# middle of a string (where they most likely carry their commmon meaning)
|
||||
LNG_COMMON_WORDS = frozenset([
|
||||
# english words
|
||||
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
|
||||
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
|
||||
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
|
||||
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
|
||||
# french words
|
||||
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
|
||||
'mal', 'est', 'vol', 'or', 'mon', 'se',
|
||||
# spanish words
|
||||
'la', 'el', 'del', 'por', 'mar',
|
||||
# other
|
||||
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
|
||||
'vi', 'ben', 'da', 'lt', 'ch',
|
||||
# new from babelfish
|
||||
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
|
||||
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
|
||||
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
|
||||
'fer', 'fun', 'two', 'big', 'psy', 'air',
|
||||
# release groups
|
||||
'bs' # Bosnian
|
||||
])
|
||||
|
||||
|
||||
subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
|
||||
subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
|
||||
lang_prefixes = ['true']
|
||||
|
||||
|
||||
def find_possible_languages(string):
|
||||
"""Find possible languages in the string
|
||||
|
||||
:return: list of tuple (property, Language, lang_word, word)
|
||||
"""
|
||||
words = find_words(string)
|
||||
|
||||
valid_words = []
|
||||
for word in words:
|
||||
lang_word = word.lower()
|
||||
key = 'language'
|
||||
for prefix in subtitle_prefixes:
|
||||
if lang_word.startswith(prefix):
|
||||
lang_word = lang_word[len(prefix):]
|
||||
key = 'subtitleLanguage'
|
||||
for suffix in subtitle_suffixes:
|
||||
if lang_word.endswith(suffix):
|
||||
lang_word = lang_word[:len(suffix)]
|
||||
key = 'subtitleLanguage'
|
||||
for prefix in lang_prefixes:
|
||||
if lang_word.startswith(prefix):
|
||||
lang_word = lang_word[len(prefix):]
|
||||
if not lang_word in LNG_COMMON_WORDS:
|
||||
try:
|
||||
lang = Language(lang_word)
|
||||
# Keep language with alpha2 equilavent. Others are probably an uncommon language.
|
||||
if lang == 'mul' or hasattr(lang, 'alpha2'):
|
||||
valid_words.append((key, lang, lang_word, word))
|
||||
except babelfish.Error:
|
||||
pass
|
||||
return valid_words
|
||||
|
||||
|
||||
def search_language(string, lang_filter=None):
|
||||
"""Looks for language patterns, and if found return the language object,
|
||||
its group span and an associated confidence.
|
||||
|
||||
you can specify a list of allowed languages using the lang_filter argument,
|
||||
as in lang_filter = [ 'fr', 'eng', 'spanish' ]
|
||||
|
||||
>>> search_language('movie [en].avi')['language']
|
||||
Language(English)
|
||||
|
||||
>>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es'])
|
||||
|
||||
"""
|
||||
|
||||
if lang_filter:
|
||||
lang_filter = set(babelfish.Language.fromguessit(lang) for lang in lang_filter)
|
||||
|
||||
confidence = 1.0 # for all of them
|
||||
|
||||
for prop, language, lang, word in find_possible_languages(string):
|
||||
pos = string.find(word)
|
||||
end = pos + len(word)
|
||||
|
||||
if lang_filter and language not in lang_filter:
|
||||
continue
|
||||
|
||||
# only allow those languages that have a 2-letter code, those that
|
||||
# don't are too esoteric and probably false matches
|
||||
#if language.lang not in lng3_to_lng2:
|
||||
# continue
|
||||
|
||||
# confidence depends on alpha2, alpha3, english name, ...
|
||||
if len(lang) == 2:
|
||||
confidence = 0.8
|
||||
elif len(lang) == 3:
|
||||
confidence = 0.9
|
||||
elif prop == 'subtitleLanguage':
|
||||
confidence = 0.6 # Subtitle prefix found with language
|
||||
else:
|
||||
# Note: we could either be really confident that we found a
|
||||
# language or assume that full language names are too
|
||||
# common words and lower their confidence accordingly
|
||||
confidence = 0.3 # going with the low-confidence route here
|
||||
|
||||
return Guess({prop: language}, confidence=confidence, input=string, span=(pos, end))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def guess_language(text): # pragma: no cover
|
||||
"""Guess the language in which a body of text is written.
|
||||
|
||||
This uses the external guess-language python module, and will fail and return
|
||||
Language(Undetermined) if it is not installed.
|
||||
"""
|
||||
try:
|
||||
from guess_language import guessLanguage
|
||||
return babelfish.Language.fromguessit(guessLanguage(text))
|
||||
|
||||
except ImportError:
|
||||
log.error('Cannot detect the language of the given text body, missing dependency: guess-language')
|
||||
log.error('Please install it from PyPI, by doing eg: pip install guess-language')
|
||||
return UNDETERMINED
|
247
lib/guessit/matcher.py
Normal file
247
lib/guessit/matcher.py
Normal file
|
@ -0,0 +1,247 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, \
|
||||
unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
from guessit import PY3, u
|
||||
from guessit.transfo import TransformerException
|
||||
from guessit.matchtree import MatchTree
|
||||
from guessit.textutils import normalize_unicode, clean_string
|
||||
from guessit.guess import Guess
|
||||
import inspect
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IterativeMatcher(object):
|
||||
"""An iterative matcher tries to match different patterns that appear
|
||||
in the filename.
|
||||
|
||||
The ``filetype`` argument indicates which type of file you want to match.
|
||||
If it is undefined, the matcher will try to see whether it can guess
|
||||
that the file corresponds to an episode, or otherwise will assume it is
|
||||
a movie.
|
||||
|
||||
The recognized ``filetype`` values are:
|
||||
``['subtitle', 'info', 'movie', 'moviesubtitle', 'movieinfo', 'episode',
|
||||
'episodesubtitle', 'episodeinfo']``
|
||||
|
||||
``options`` is a dict of options values to be passed to the transformations used
|
||||
by the matcher.
|
||||
|
||||
The IterativeMatcher works mainly in 2 steps:
|
||||
|
||||
First, it splits the filename into a match_tree, which is a tree of groups
|
||||
which have a semantic meaning, such as episode number, movie title,
|
||||
etc...
|
||||
|
||||
The match_tree created looks like the following::
|
||||
|
||||
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
|
||||
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
|
||||
0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
|
||||
__________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
|
||||
xxxxxttttttttttttt ffffff vvvv xxxxxx ll lll xx xxx ccc
|
||||
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
|
||||
|
||||
The first 3 lines indicates the group index in which a char in the
|
||||
filename is located. So for instance, ``x264`` (in the middle) is the group (0, 4, 1), and
|
||||
it corresponds to a video codec, denoted by the letter ``v`` in the 4th line.
|
||||
(for more info, see guess.matchtree.to_string)
|
||||
|
||||
Second, it tries to merge all this information into a single object
|
||||
containing all the found properties, and does some (basic) conflict
|
||||
resolution when they arise.
|
||||
"""
|
||||
def __init__(self, filename, options=None, **kwargs):
|
||||
options = dict(options or {})
|
||||
for k, v in kwargs.items():
|
||||
if k not in options or not options[k]:
|
||||
options[k] = v # options dict has priority over keyword arguments
|
||||
self._validate_options(options)
|
||||
if not PY3 and not isinstance(filename, unicode):
|
||||
log.warning('Given filename to matcher is not unicode...')
|
||||
filename = filename.decode('utf-8')
|
||||
|
||||
filename = normalize_unicode(filename)
|
||||
self.match_tree = MatchTree(filename)
|
||||
self.options = options
|
||||
self._transfo_calls = []
|
||||
|
||||
# sanity check: make sure we don't process a (mostly) empty string
|
||||
if clean_string(filename) == '':
|
||||
return
|
||||
|
||||
from guessit.plugins import transformers
|
||||
|
||||
try:
|
||||
mtree = self.match_tree
|
||||
if 'type' in self.options:
|
||||
mtree.guess.set('type', self.options['type'], confidence=0.0)
|
||||
|
||||
# Process
|
||||
for transformer in transformers.all_transformers():
|
||||
self._process(transformer, False)
|
||||
|
||||
# Post-process
|
||||
for transformer in transformers.all_transformers():
|
||||
self._process(transformer, True)
|
||||
|
||||
log.debug('Found match tree:\n%s' % u(mtree))
|
||||
except TransformerException as e:
|
||||
log.debug('An error has occured in Transformer %s: %s' % (e.transformer, e))
|
||||
|
||||
def _process(self, transformer, post=False):
|
||||
if not hasattr(transformer, 'should_process') or transformer.should_process(self.match_tree, self.options):
|
||||
if post:
|
||||
transformer.post_process(self.match_tree, self.options)
|
||||
else:
|
||||
transformer.process(self.match_tree, self.options)
|
||||
self._transfo_calls.append(transformer)
|
||||
|
||||
@property
|
||||
def second_pass_options(self):
|
||||
second_pass_options = {}
|
||||
for transformer in self._transfo_calls:
|
||||
if hasattr(transformer, 'second_pass_options'):
|
||||
transformer_second_pass_options = transformer.second_pass_options(self.match_tree, self.options)
|
||||
if transformer_second_pass_options:
|
||||
second_pass_options.update(transformer_second_pass_options)
|
||||
|
||||
return second_pass_options
|
||||
|
||||
def _validate_options(self, options):
|
||||
valid_filetypes = ('subtitle', 'info', 'video',
|
||||
'movie', 'moviesubtitle', 'movieinfo',
|
||||
'episode', 'episodesubtitle', 'episodeinfo')
|
||||
|
||||
type = options.get('type')
|
||||
if type and type not in valid_filetypes:
|
||||
raise ValueError("filetype needs to be one of %s" % valid_filetypes)
|
||||
|
||||
def matched(self):
|
||||
return self.match_tree.matched()
|
||||
|
||||
|
||||
def found_property(node, name, value=None, confidence=1.0, update_guess=True, logger=None):
|
||||
# automatically retrieve the log object from the caller frame
|
||||
if not logger:
|
||||
caller_frame = inspect.stack()[1][0]
|
||||
logger = caller_frame.f_locals['self'].log
|
||||
guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence)
|
||||
return found_guess(node, guess, update_guess=update_guess, logger=logger)
|
||||
|
||||
|
||||
def found_guess(node, guess, update_guess=True, logger=None):
|
||||
if node.guess:
|
||||
if update_guess:
|
||||
node.guess.update_highest_confidence(guess)
|
||||
else:
|
||||
child = node.add_child(guess.metadata().span)
|
||||
child.guess = guess
|
||||
else:
|
||||
node.guess = guess
|
||||
log_found_guess(guess, logger)
|
||||
return node.guess
|
||||
|
||||
|
||||
def log_found_guess(guess, logger=None):
|
||||
for k, v in guess.items():
|
||||
(logger or log).debug('Property found: %s=%s (confidence=%.2f)' % (k, v, guess.confidence(k)))
|
||||
|
||||
|
||||
class GuessFinder(object):
|
||||
def __init__(self, guess_func, confidence=None, logger=None, options=None):
|
||||
self.guess_func = guess_func
|
||||
self.confidence = confidence
|
||||
self.logger = logger or log
|
||||
self.options = options
|
||||
|
||||
def process_nodes(self, nodes):
|
||||
for node in nodes:
|
||||
self.process_node(node)
|
||||
|
||||
def process_node(self, node, iterative=True, partial_span=None):
|
||||
value = None
|
||||
if partial_span:
|
||||
value = node.value[partial_span[0]:partial_span[1]]
|
||||
else:
|
||||
value = node.value
|
||||
string = ' %s ' % value # add sentinels
|
||||
|
||||
if not self.options:
|
||||
matcher_result = self.guess_func(string, node)
|
||||
else:
|
||||
matcher_result = self.guess_func(string, node, self.options)
|
||||
|
||||
if matcher_result:
|
||||
if not isinstance(matcher_result, Guess):
|
||||
result, span = matcher_result
|
||||
else:
|
||||
result, span = matcher_result, matcher_result.metadata().span
|
||||
|
||||
if result:
|
||||
# readjust span to compensate for sentinels
|
||||
span = (span[0] - 1, span[1] - 1)
|
||||
|
||||
# readjust span to compensate for partial_span
|
||||
if partial_span:
|
||||
span = (span[0] + partial_span[0], span[1] + partial_span[0])
|
||||
|
||||
partition_spans = None
|
||||
if self.options and 'skip_nodes' in self.options:
|
||||
skip_nodes = self.options.get('skip_nodes')
|
||||
for skip_node in skip_nodes:
|
||||
if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
|
||||
skip_node.span == span:
|
||||
partition_spans = node.get_partition_spans(skip_node.span)
|
||||
partition_spans.remove(skip_node.span)
|
||||
break
|
||||
|
||||
if not partition_spans:
|
||||
# restore sentinels compensation
|
||||
|
||||
guess = None
|
||||
if isinstance(result, Guess):
|
||||
guess = result
|
||||
else:
|
||||
guess = Guess(result, confidence=self.confidence, input=string, span=span)
|
||||
|
||||
if not iterative:
|
||||
node.guess.update(guess)
|
||||
else:
|
||||
absolute_span = (span[0] + node.offset, span[1] + node.offset)
|
||||
node.partition(span)
|
||||
found_child = None
|
||||
for child in node.children:
|
||||
if child.span == absolute_span:
|
||||
found_guess(child, guess, self.logger)
|
||||
found_child = child
|
||||
break
|
||||
for child in node.children:
|
||||
if not child is found_child:
|
||||
self.process_node(child)
|
||||
else:
|
||||
for partition_span in partition_spans:
|
||||
self.process_node(node, partial_span=partition_span)
|
439
lib/guessit/matchtree.py
Normal file
439
lib/guessit/matchtree.py
Normal file
|
@ -0,0 +1,439 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import guessit # @UnusedImport needed for doctests
|
||||
from guessit import UnicodeMixin, base_text_type
|
||||
from guessit.textutils import clean_string, str_fill
|
||||
from guessit.patterns import group_delimiters
|
||||
from guessit.guess import (merge_similar_guesses, merge_all,
|
||||
choose_int, choose_string, Guess)
|
||||
import copy
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseMatchTree(UnicodeMixin):
|
||||
"""A BaseMatchTree is a tree covering the filename, where each
|
||||
node represents a substring in the filename and can have a ``Guess``
|
||||
associated with it that contains the information that has been guessed
|
||||
in this node. Nodes can be further split into subnodes until a proper
|
||||
split has been found.
|
||||
|
||||
Each node has the following attributes:
|
||||
- string = the original string of which this node represents a region
|
||||
- span = a pair of (begin, end) indices delimiting the substring
|
||||
- parent = parent node
|
||||
- children = list of children nodes
|
||||
- guess = Guess()
|
||||
|
||||
BaseMatchTrees are displayed in the following way:
|
||||
|
||||
>>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
|
||||
>>> print(guessit.IterativeMatcher(path).match_tree)
|
||||
000000 1111111111111111 2222222222222222222222222222222222222222222 333
|
||||
000000 0000000000111111 0000000000111111222222222222222222222222222 000
|
||||
011112 011112000011111222222222222222222 000
|
||||
011112222222222222
|
||||
0000011112222
|
||||
01112 0111
|
||||
Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
|
||||
tttttttttt yyyy yyyy fffff ssss aaa vvvv rrr ccc
|
||||
Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
|
||||
|
||||
The last line contains the filename, which you can use a reference.
|
||||
The previous line contains the type of property that has been found.
|
||||
The line before that contains the filename, where all the found groups
|
||||
have been blanked. Basically, what is left on this line are the leftover
|
||||
groups which could not be identified.
|
||||
|
||||
The lines before that indicate the indices of the groups in the tree.
|
||||
|
||||
For instance, the part of the filename 'BDRip' is the leaf with index
|
||||
``(2, 2, 1)`` (read from top to bottom), and its meaning is 'format'
|
||||
(as shown by the ``f``'s on the last-but-one line).
|
||||
"""
|
||||
|
||||
def __init__(self, string='', span=None, parent=None):
|
||||
self.string = string
|
||||
self.span = span or (0, len(string))
|
||||
self.parent = parent
|
||||
self.children = []
|
||||
self.guess = Guess()
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
"""Return the substring that this node matches."""
|
||||
return self.string[self.span[0]:self.span[1]]
|
||||
|
||||
@property
|
||||
def clean_value(self):
|
||||
"""Return a cleaned value of the matched substring, with better
|
||||
presentation formatting (punctuation marks removed, duplicate
|
||||
spaces, ...)"""
|
||||
return clean_string(self.value)
|
||||
|
||||
@property
|
||||
def offset(self):
|
||||
return self.span[0]
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
"""Return a dict containing all the info guessed by this node,
|
||||
subnodes included."""
|
||||
result = dict(self.guess)
|
||||
|
||||
for c in self.children:
|
||||
result.update(c.info)
|
||||
|
||||
return result
|
||||
|
||||
@property
|
||||
def root(self):
|
||||
"""Return the root node of the tree."""
|
||||
if not self.parent:
|
||||
return self
|
||||
|
||||
return self.parent.root
|
||||
|
||||
@property
|
||||
def depth(self):
|
||||
"""Return the depth of this node."""
|
||||
if self.is_leaf():
|
||||
return 0
|
||||
|
||||
return 1 + max(c.depth for c in self.children)
|
||||
|
||||
def is_leaf(self):
|
||||
"""Return whether this node is a leaf or not."""
|
||||
return self.children == []
|
||||
|
||||
def add_child(self, span):
|
||||
"""Add a new child node to this node with the given span."""
|
||||
child = MatchTree(self.string, span=span, parent=self)
|
||||
self.children.append(child)
|
||||
return child
|
||||
|
||||
def get_partition_spans(self, indices):
|
||||
"""Return the list of absolute spans for the regions of the original
|
||||
string defined by splitting this node at the given indices (relative
|
||||
to this node)"""
|
||||
indices = sorted(indices)
|
||||
if indices[0] != 0:
|
||||
indices.insert(0, 0)
|
||||
if indices[-1] != len(self.value):
|
||||
indices.append(len(self.value))
|
||||
|
||||
spans = []
|
||||
for start, end in zip(indices[:-1], indices[1:]):
|
||||
spans.append((self.offset + start,
|
||||
self.offset + end))
|
||||
return spans
|
||||
|
||||
def partition(self, indices):
|
||||
"""Partition this node by splitting it at the given indices,
|
||||
relative to this node."""
|
||||
for partition_span in self.get_partition_spans(indices):
|
||||
self.add_child(span=partition_span)
|
||||
|
||||
def split_on_components(self, components):
|
||||
offset = 0
|
||||
for c in components:
|
||||
start = self.value.find(c, offset)
|
||||
end = start + len(c)
|
||||
self.add_child(span=(self.offset + start,
|
||||
self.offset + end))
|
||||
offset = end
|
||||
|
||||
def nodes_at_depth(self, depth):
|
||||
"""Return all the nodes at a given depth in the tree"""
|
||||
if depth == 0:
|
||||
yield self
|
||||
|
||||
for child in self.children:
|
||||
for node in child.nodes_at_depth(depth - 1):
|
||||
yield node
|
||||
|
||||
@property
|
||||
def node_idx(self):
|
||||
"""Return this node's index in the tree, as a tuple.
|
||||
If this node is the root of the tree, then return ()."""
|
||||
if self.parent is None:
|
||||
return ()
|
||||
return self.parent.node_idx + (self.parent.children.index(self),)
|
||||
|
||||
def node_at(self, idx):
|
||||
"""Return the node at the given index in the subtree rooted at
|
||||
this node."""
|
||||
if not idx:
|
||||
return self
|
||||
|
||||
try:
|
||||
return self.children[idx[0]].node_at(idx[1:])
|
||||
except IndexError:
|
||||
raise ValueError('Non-existent node index: %s' % (idx,))
|
||||
|
||||
def nodes(self):
|
||||
"""Return all the nodes and subnodes in this tree."""
|
||||
yield self
|
||||
for child in self.children:
|
||||
for node in child.nodes():
|
||||
yield node
|
||||
|
||||
def _leaves(self):
|
||||
"""Return a generator over all the nodes that are leaves."""
|
||||
if self.is_leaf():
|
||||
yield self
|
||||
else:
|
||||
for child in self.children:
|
||||
# pylint: disable=W0212
|
||||
for leaf in child._leaves():
|
||||
yield leaf
|
||||
|
||||
def group_node(self):
|
||||
return self._other_group_node(0)
|
||||
|
||||
def previous_group_node(self):
|
||||
return self._other_group_node(-1)
|
||||
|
||||
def next_group_node(self):
|
||||
return self._other_group_node(+1)
|
||||
|
||||
def _other_group_node(self, offset):
|
||||
if len(self.node_idx) > 1:
|
||||
group_idx = self.node_idx[:2]
|
||||
if group_idx[1] + offset >= 0:
|
||||
other_group_idx = (group_idx[0], group_idx[1] + offset)
|
||||
try:
|
||||
other_group_node = self.root.node_at(other_group_idx)
|
||||
return other_group_node
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def leaves(self):
|
||||
"""Return a list of all the nodes that are leaves."""
|
||||
return list(self._leaves())
|
||||
|
||||
def previous_leaf(self, leaf):
|
||||
"""Return previous leaf for this node"""
|
||||
return self._other_leaf(leaf, -1)
|
||||
|
||||
def next_leaf(self, leaf):
|
||||
"""Return next leaf for this node"""
|
||||
return self._other_leaf(leaf, +1)
|
||||
|
||||
def _other_leaf(self, leaf, offset):
|
||||
leaves = self.leaves()
|
||||
index = leaves.index(leaf) + offset
|
||||
if index > 0 and index < len(leaves):
|
||||
return leaves[index]
|
||||
return None
|
||||
|
||||
def previous_leaves(self, leaf):
|
||||
"""Return previous leaves for this node"""
|
||||
leaves = self.leaves()
|
||||
index = leaves.index(leaf)
|
||||
if index > 0 and index < len(leaves):
|
||||
previous_leaves = leaves[:index]
|
||||
previous_leaves.reverse()
|
||||
return previous_leaves
|
||||
return []
|
||||
|
||||
def next_leaves(self, leaf):
|
||||
"""Return next leaves for this node"""
|
||||
leaves = self.leaves()
|
||||
index = leaves.index(leaf)
|
||||
if index > 0 and index < len(leaves):
|
||||
return leaves[index + 1:len(leaves)]
|
||||
return []
|
||||
|
||||
def to_string(self):
|
||||
"""Return a readable string representation of this tree.
|
||||
|
||||
The result is a multi-line string, where the lines are:
|
||||
- line 1 -> N-2: each line contains the nodes at the given depth in the tree
|
||||
- line N-2: original string where all the found groups have been blanked
|
||||
- line N-1: type of property that has been found
|
||||
- line N: the original string, which you can use a reference.
|
||||
"""
|
||||
empty_line = ' ' * len(self.string)
|
||||
|
||||
def to_hex(x):
|
||||
if isinstance(x, int):
|
||||
return str(x) if x < 10 else chr(55 + x)
|
||||
return x
|
||||
|
||||
def meaning(result):
|
||||
mmap = {'episodeNumber': 'E',
|
||||
'season': 'S',
|
||||
'extension': 'e',
|
||||
'format': 'f',
|
||||
'language': 'l',
|
||||
'country': 'C',
|
||||
'videoCodec': 'v',
|
||||
'videoProfile': 'v',
|
||||
'audioCodec': 'a',
|
||||
'audioProfile': 'a',
|
||||
'audioChannels': 'a',
|
||||
'website': 'w',
|
||||
'container': 'c',
|
||||
'series': 'T',
|
||||
'title': 't',
|
||||
'date': 'd',
|
||||
'year': 'y',
|
||||
'releaseGroup': 'r',
|
||||
'screenSize': 's',
|
||||
'other': 'o'
|
||||
}
|
||||
|
||||
if result is None:
|
||||
return ' '
|
||||
|
||||
for prop, l in mmap.items():
|
||||
if prop in result:
|
||||
return l
|
||||
|
||||
return 'x'
|
||||
|
||||
lines = [empty_line] * (self.depth + 2) # +2: remaining, meaning
|
||||
lines[-2] = self.string
|
||||
|
||||
for node in self.nodes():
|
||||
if node == self:
|
||||
continue
|
||||
|
||||
idx = node.node_idx
|
||||
depth = len(idx) - 1
|
||||
if idx:
|
||||
lines[depth] = str_fill(lines[depth], node.span,
|
||||
to_hex(idx[-1]))
|
||||
if node.guess:
|
||||
lines[-2] = str_fill(lines[-2], node.span, '_')
|
||||
lines[-1] = str_fill(lines[-1], node.span, meaning(node.guess))
|
||||
|
||||
lines.append(self.string)
|
||||
|
||||
return '\n'.join(l.rstrip() for l in lines)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.to_string()
|
||||
|
||||
def __repr__(self):
|
||||
return '<MatchTree: root=%s>' % self.value
|
||||
|
||||
|
||||
class MatchTree(BaseMatchTree):
|
||||
"""The MatchTree contains a few "utility" methods which are not necessary
|
||||
for the BaseMatchTree, but add a lot of convenience for writing
|
||||
higher-level rules.
|
||||
"""
|
||||
|
||||
_matched_result = None
|
||||
|
||||
def _unidentified_leaves(self,
|
||||
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
||||
for leaf in self._leaves():
|
||||
if not leaf.guess and valid(leaf):
|
||||
yield leaf
|
||||
|
||||
def unidentified_leaves(self,
|
||||
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
||||
"""Return a list of leaves that are not empty."""
|
||||
return list(self._unidentified_leaves(valid))
|
||||
|
||||
def _leaves_containing(self, property_name):
|
||||
if isinstance(property_name, base_text_type):
|
||||
property_name = [property_name]
|
||||
|
||||
for leaf in self._leaves():
|
||||
for prop in property_name:
|
||||
if prop in leaf.guess:
|
||||
yield leaf
|
||||
break
|
||||
|
||||
def leaves_containing(self, property_name):
|
||||
"""Return a list of leaves that guessed the given property."""
|
||||
return list(self._leaves_containing(property_name))
|
||||
|
||||
def first_leaf_containing(self, property_name):
|
||||
"""Return the first leaf containing the given property."""
|
||||
try:
|
||||
return next(self._leaves_containing(property_name))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def _previous_unidentified_leaves(self, node):
|
||||
node_idx = node.node_idx
|
||||
for leaf in self._unidentified_leaves():
|
||||
if leaf.node_idx < node_idx:
|
||||
yield leaf
|
||||
|
||||
def previous_unidentified_leaves(self, node):
|
||||
"""Return a list of non-empty leaves that are before the given
|
||||
node (in the string)."""
|
||||
return list(self._previous_unidentified_leaves(node))
|
||||
|
||||
def _previous_leaves_containing(self, node, property_name):
|
||||
node_idx = node.node_idx
|
||||
for leaf in self._leaves_containing(property_name):
|
||||
if leaf.node_idx < node_idx:
|
||||
yield leaf
|
||||
|
||||
def previous_leaves_containing(self, node, property_name):
|
||||
"""Return a list of leaves containing the given property that are
|
||||
before the given node (in the string)."""
|
||||
return list(self._previous_leaves_containing(node, property_name))
|
||||
|
||||
def is_explicit(self):
|
||||
"""Return whether the group was explicitly enclosed by
|
||||
parentheses/square brackets/etc."""
|
||||
return (self.value[0] + self.value[-1]) in group_delimiters
|
||||
|
||||
def matched(self):
|
||||
"""Return a single guess that contains all the info found in the
|
||||
nodes of this tree, trying to merge properties as good as possible.
|
||||
"""
|
||||
if not self._matched_result:
|
||||
# we need to make a copy here, as the merge functions work in place and
|
||||
# calling them on the match tree would modify it
|
||||
parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]
|
||||
|
||||
# 1- try to merge similar information together and give it a higher
|
||||
# confidence
|
||||
for int_part in ('year', 'season', 'episodeNumber'):
|
||||
merge_similar_guesses(parts, int_part, choose_int)
|
||||
|
||||
for string_part in ('title', 'series', 'container', 'format',
|
||||
'releaseGroup', 'website', 'audioCodec',
|
||||
'videoCodec', 'screenSize', 'episodeFormat',
|
||||
'audioChannels', 'idNumber'):
|
||||
merge_similar_guesses(parts, string_part, choose_string)
|
||||
|
||||
# 2- merge the rest, potentially discarding information not properly
|
||||
# merged before
|
||||
result = merge_all(parts,
|
||||
append=['language', 'subtitleLanguage', 'other', 'special'])
|
||||
|
||||
log.debug('Final result: ' + result.nice_string())
|
||||
self._matched_result = result
|
||||
return self._matched_result
|
25
lib/guessit/options.py
Normal file
25
lib/guessit/options.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
from optparse import OptionParser
|
||||
|
||||
option_parser = OptionParser(usage='usage: %prog [options] file1 [file2...]')
|
||||
option_parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
|
||||
help='display debug output')
|
||||
option_parser.add_option('-p', '--properties', dest='properties', action='store_true', default=False,
|
||||
help='Display properties that can be guessed.')
|
||||
option_parser.add_option('-l', '--values', dest='values', action='store_true', default=False,
|
||||
help='Display property values that can be guessed.')
|
||||
option_parser.add_option('-s', '--transformers', dest='transformers', action='store_true', default=False,
|
||||
help='Display transformers that can be used.')
|
||||
option_parser.add_option('-i', '--info', dest='info', default='filename',
|
||||
help='the desired information type: filename, hash_mpc or a hash from python\'s '
|
||||
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
|
||||
'them, comma-separated')
|
||||
option_parser.add_option('-n', '--name-only', dest='name_only', action='store_true', default=False,
|
||||
help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.')
|
||||
option_parser.add_option('-t', '--type', dest='type', default=None,
|
||||
help='the suggested file type: movie, episode. If undefined, type will be guessed.')
|
||||
option_parser.add_option('-a', '--advanced', dest='advanced', action='store_true', default=False,
|
||||
help='display advanced information for filename guesses, as json output')
|
||||
option_parser.add_option('-y', '--yaml', dest='yaml', action='store_true', default=False,
|
||||
help='display information for filename guesses as yaml output (like unit-test)')
|
||||
option_parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
|
||||
help='run a few builtin tests instead of analyzing a file')
|
77
lib/guessit/patterns/__init__.py
Normal file
77
lib/guessit/patterns/__init__.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from guessit import base_text_type
|
||||
|
||||
group_delimiters = ['()', '[]', '{}']
|
||||
|
||||
# separator character regexp
|
||||
sep = r'[][,)(}:{+ /\._-]' # regexp art, hehe :D
|
||||
|
||||
_dash = '-'
|
||||
_psep = '[\W_]?'
|
||||
|
||||
|
||||
def build_or_pattern(patterns):
|
||||
"""Build a or pattern string from a list of possible patterns
|
||||
"""
|
||||
or_pattern = ''
|
||||
for pattern in patterns:
|
||||
if not or_pattern:
|
||||
or_pattern += '(?:'
|
||||
else:
|
||||
or_pattern += '|'
|
||||
or_pattern += ('(?:%s)' % pattern)
|
||||
or_pattern += ')'
|
||||
return or_pattern
|
||||
|
||||
|
||||
def compile_pattern(pattern, enhance=True):
|
||||
"""Compile and enhance a pattern
|
||||
|
||||
:param pattern: Pattern to compile (regexp).
|
||||
:type pattern: string
|
||||
|
||||
:param pattern: Enhance pattern before compiling.
|
||||
:type pattern: string
|
||||
|
||||
:return: The compiled pattern
|
||||
:rtype: regular expression object
|
||||
"""
|
||||
return re.compile(enhance_pattern(pattern) if enhance else pattern, re.IGNORECASE)
|
||||
|
||||
|
||||
def enhance_pattern(pattern):
|
||||
"""Enhance pattern to match more equivalent values.
|
||||
|
||||
'-' are replaced by '[\W_]?', which matches more types of separators (or none)
|
||||
|
||||
:param pattern: Pattern to enhance (regexp).
|
||||
:type pattern: string
|
||||
|
||||
:return: The enhanced pattern
|
||||
:rtype: string
|
||||
"""
|
||||
return pattern.replace(_dash, _psep)
|
32
lib/guessit/patterns/extension.py
Normal file
32
lib/guessit/patterns/extension.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
subtitle_exts = ['srt', 'idx', 'sub', 'ssa']
|
||||
|
||||
info_exts = ['nfo']
|
||||
|
||||
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
|
||||
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
|
||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
|
||||
'iso']
|
150
lib/guessit/patterns/numeral.py
Normal file
150
lib/guessit/patterns/numeral.py
Normal file
|
@ -0,0 +1,150 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
digital_numeral = '\d{1,3}'
|
||||
|
||||
roman_numeral = "(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})"
|
||||
|
||||
english_word_numeral_list = [
|
||||
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
|
||||
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
|
||||
]
|
||||
|
||||
french_word_numeral_list = [
|
||||
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
|
||||
]
|
||||
|
||||
french_alt_word_numeral_list = [
|
||||
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
|
||||
]
|
||||
|
||||
|
||||
def __build_word_numeral(*args, **kwargs):
|
||||
re = None
|
||||
for word_list in args:
|
||||
for word in word_list:
|
||||
if not re:
|
||||
re = '(?:(?=\w+)'
|
||||
else:
|
||||
re += '|'
|
||||
re += word
|
||||
re += ')'
|
||||
return re
|
||||
|
||||
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
|
||||
|
||||
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
|
||||
|
||||
__romanNumeralMap = (
|
||||
('M', 1000),
|
||||
('CM', 900),
|
||||
('D', 500),
|
||||
('CD', 400),
|
||||
('C', 100),
|
||||
('XC', 90),
|
||||
('L', 50),
|
||||
('XL', 40),
|
||||
('X', 10),
|
||||
('IX', 9),
|
||||
('V', 5),
|
||||
('IV', 4),
|
||||
('I', 1)
|
||||
)
|
||||
|
||||
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
|
||||
|
||||
|
||||
def __parse_roman(value):
|
||||
"""convert Roman numeral to integer"""
|
||||
if not __romanNumeralPattern.search(value):
|
||||
raise ValueError('Invalid Roman numeral: %s' % value)
|
||||
|
||||
result = 0
|
||||
index = 0
|
||||
for numeral, integer in __romanNumeralMap:
|
||||
while value[index:index + len(numeral)] == numeral:
|
||||
result += integer
|
||||
index += len(numeral)
|
||||
return result
|
||||
|
||||
|
||||
def __parse_word(value):
|
||||
"""Convert Word numeral to integer"""
|
||||
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
|
||||
try:
|
||||
return word_list.index(value)
|
||||
except ValueError:
|
||||
pass
|
||||
raise ValueError
|
||||
|
||||
|
||||
_clean_re = re.compile('[^\d]*(\d+)[^\d]*')
|
||||
|
||||
|
||||
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
|
||||
"""Parse a numeric value into integer.
|
||||
|
||||
input can be an integer as a string, a roman numeral or a word
|
||||
|
||||
:param value: Value to parse. Can be an integer, roman numeral or word.
|
||||
:type value: string
|
||||
|
||||
:return: Numeric value, or None if value can't be parsed
|
||||
:rtype: int
|
||||
"""
|
||||
if int_enabled:
|
||||
try:
|
||||
if clean:
|
||||
match = _clean_re.match(value)
|
||||
if match:
|
||||
clean_value = match.group(1)
|
||||
return int(clean_value)
|
||||
return int(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if roman_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_roman(word)
|
||||
except ValueError:
|
||||
pass
|
||||
return __parse_roman(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if word_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_word(word)
|
||||
except ValueError:
|
||||
pass
|
||||
return __parse_word(value)
|
||||
except ValueError:
|
||||
pass
|
||||
raise ValueError('Invalid numeral: ' + value)
|
21
lib/guessit/plugins/__init__.py
Normal file
21
lib/guessit/plugins/__init__.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
186
lib/guessit/plugins/transformers.py
Normal file
186
lib/guessit/plugins/transformers.py
Normal file
|
@ -0,0 +1,186 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from stevedore import ExtensionManager
|
||||
from pkg_resources import EntryPoint
|
||||
|
||||
from stevedore.extension import Extension
|
||||
from logging import getLogger
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
|
||||
class Transformer(object): # pragma: no cover
|
||||
def __init__(self, priority=0):
|
||||
self.priority = priority
|
||||
self.log = getLogger(self.name)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
def supported_properties(self):
|
||||
return {}
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
return None
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
return True
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
pass
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
pass
|
||||
|
||||
def rate_quality(self, guess, *props):
|
||||
return 0
|
||||
|
||||
|
||||
class CustomTransformerExtensionManager(ExtensionManager):
|
||||
def __init__(self, namespace='guessit.transformer', invoke_on_load=True,
|
||||
invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
super(CustomTransformerExtensionManager, self).__init__(namespace=namespace,
|
||||
invoke_on_load=invoke_on_load,
|
||||
invoke_args=invoke_args,
|
||||
invoke_kwds=invoke_kwds,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback,
|
||||
verify_requirements=verify_requirements)
|
||||
|
||||
def order_extensions(self, extensions):
|
||||
"""Order the loaded transformers
|
||||
|
||||
It should follow those rules
|
||||
- website before language (eg: tvu.org.ru vs russian)
|
||||
- language before episodes_rexps
|
||||
- properties before language (eg: he-aac vs hebrew)
|
||||
- release_group before properties (eg: XviD-?? vs xvid)
|
||||
"""
|
||||
extensions.sort(key=lambda ext: -ext.obj.priority)
|
||||
return extensions
|
||||
|
||||
def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
|
||||
if not ep.dist:
|
||||
plugin = ep.load(require=False)
|
||||
else:
|
||||
plugin = ep.load(require=verify_requirements)
|
||||
if invoke_on_load:
|
||||
obj = plugin(*invoke_args, **invoke_kwds)
|
||||
else:
|
||||
obj = None
|
||||
return Extension(ep.name, ep, plugin, obj)
|
||||
|
||||
def _load_plugins(self, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
|
||||
return self.order_extensions(super(CustomTransformerExtensionManager, self)._load_plugins(invoke_on_load, invoke_args, invoke_kwds, verify_requirements))
|
||||
|
||||
def objects(self):
|
||||
return self.map(self._get_obj)
|
||||
|
||||
def _get_obj(self, ext):
|
||||
return ext.obj
|
||||
|
||||
def object(self, name):
|
||||
try:
|
||||
return self[name].obj
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def register_module(self, name, module_name):
|
||||
ep = EntryPoint(name, module_name)
|
||||
loaded = self._load_one_plugin(ep, invoke_on_load=True, invoke_args=(), invoke_kwds={})
|
||||
if loaded:
|
||||
self.extensions.append(loaded)
|
||||
self.extensions = self.order_extensions(self.extensions)
|
||||
self._extensions_by_name = None
|
||||
|
||||
|
||||
class DefaultTransformerExtensionManager(CustomTransformerExtensionManager):
|
||||
@property
|
||||
def _internal_entry_points(self):
|
||||
return ['split_path_components = guessit.transfo.split_path_components:SplitPathComponents',
|
||||
'guess_filetype = guessit.transfo.guess_filetype:GuessFiletype',
|
||||
'split_explicit_groups = guessit.transfo.split_explicit_groups:SplitExplicitGroups',
|
||||
'guess_date = guessit.transfo.guess_date:GuessDate',
|
||||
'guess_website = guessit.transfo.guess_website:GuessWebsite',
|
||||
'guess_release_group = guessit.transfo.guess_release_group:GuessReleaseGroup',
|
||||
'guess_properties = guessit.transfo.guess_properties:GuessProperties',
|
||||
'guess_language = guessit.transfo.guess_language:GuessLanguage',
|
||||
'guess_video_rexps = guessit.transfo.guess_video_rexps:GuessVideoRexps',
|
||||
'guess_episodes_rexps = guessit.transfo.guess_episodes_rexps:GuessEpisodesRexps',
|
||||
'guess_weak_episodes_rexps = guessit.transfo.guess_weak_episodes_rexps:GuessWeakEpisodesRexps',
|
||||
'guess_bonus_features = guessit.transfo.guess_bonus_features:GuessBonusFeatures',
|
||||
'guess_year = guessit.transfo.guess_year:GuessYear',
|
||||
'guess_country = guessit.transfo.guess_country:GuessCountry',
|
||||
'guess_idnumber = guessit.transfo.guess_idnumber:GuessIdnumber',
|
||||
'split_on_dash = guessit.transfo.split_on_dash:SplitOnDash',
|
||||
'guess_episode_info_from_position = guessit.transfo.guess_episode_info_from_position:GuessEpisodeInfoFromPosition',
|
||||
'guess_movie_title_from_position = guessit.transfo.guess_movie_title_from_position:GuessMovieTitleFromPosition',
|
||||
'guess_episode_special = guessit.transfo.guess_episode_special:GuessEpisodeSpecial']
|
||||
|
||||
def _find_entry_points(self, namespace):
|
||||
entry_points = {}
|
||||
# Internal entry points
|
||||
if namespace == self.namespace:
|
||||
for internal_entry_point_str in self._internal_entry_points:
|
||||
internal_entry_point = EntryPoint.parse(internal_entry_point_str)
|
||||
entry_points[internal_entry_point.name] = internal_entry_point
|
||||
|
||||
# Package entry points
|
||||
setuptools_entrypoints = super(DefaultTransformerExtensionManager, self)._find_entry_points(namespace)
|
||||
for setuptools_entrypoint in setuptools_entrypoints:
|
||||
entry_points[setuptools_entrypoint.name] = setuptools_entrypoint
|
||||
|
||||
return list(entry_points.values())
|
||||
|
||||
_extensions = None
|
||||
|
||||
|
||||
def all_transformers():
|
||||
return _extensions.objects()
|
||||
|
||||
|
||||
def get_transformer(name):
|
||||
return _extensions.object(name)
|
||||
|
||||
|
||||
def add_transformer(name, module_name):
|
||||
_extensions.register_module(name, module_name)
|
||||
|
||||
|
||||
def reload(custom=False):
|
||||
"""
|
||||
Reload extension manager with default or custom one.
|
||||
:param custom: if True, custom manager will be used, else default one.
|
||||
Default manager will load default extensions from guessit and setuptools packaging extensions
|
||||
Custom manager will not load default extensions from guessit, using only setuptools packaging extensions.
|
||||
:type custom: boolean
|
||||
"""
|
||||
global _extensions
|
||||
if custom:
|
||||
_extensions = CustomTransformerExtensionManager()
|
||||
else:
|
||||
_extensions = DefaultTransformerExtensionManager()
|
||||
|
||||
reload()
|
65
lib/guessit/quality.py
Normal file
65
lib/guessit/quality.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import all_transformers
|
||||
|
||||
|
||||
def best_quality_properties(props, *guesses):
|
||||
"""Retrieve the best quality guess, based on given properties
|
||||
|
||||
:param props: Properties to include in the rating
|
||||
:type props: list of strings
|
||||
:param guesses: Guesses to rate
|
||||
:type guesses: :class:`guessit.guess.Guess`
|
||||
|
||||
:return: Best quality guess from all passed guesses
|
||||
:rtype: :class:`guessit.guess.Guess`
|
||||
"""
|
||||
best_guess = None
|
||||
best_rate = None
|
||||
for guess in guesses:
|
||||
for transformer in all_transformers():
|
||||
rate = transformer.rate_quality(guess, *props)
|
||||
if best_rate is None or best_rate < rate:
|
||||
best_rate = rate
|
||||
best_guess = guess
|
||||
return best_guess
|
||||
|
||||
|
||||
def best_quality(*guesses):
|
||||
"""Retrieve the best quality guess.
|
||||
|
||||
:param guesses: Guesses to rate
|
||||
:type guesses: :class:`guessit.guess.Guess`
|
||||
|
||||
:return: Best quality guess from all passed guesses
|
||||
:rtype: :class:`guessit.guess.Guess`
|
||||
"""
|
||||
best_guess = None
|
||||
best_rate = None
|
||||
for guess in guesses:
|
||||
for transformer in all_transformers():
|
||||
rate = transformer.rate_quality(guess)
|
||||
if best_rate is None or best_rate < rate:
|
||||
best_rate = rate
|
||||
best_guess = guess
|
||||
return best_guess
|
89
lib/guessit/slogging.py
Normal file
89
lib/guessit/slogging.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
GREEN_FONT = "\x1B[0;32m"
|
||||
YELLOW_FONT = "\x1B[0;33m"
|
||||
BLUE_FONT = "\x1B[0;34m"
|
||||
RED_FONT = "\x1B[0;31m"
|
||||
RESET_FONT = "\x1B[0m"
|
||||
|
||||
|
||||
def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): # pragma: no cover
|
||||
"""Set up a nice colored logger as the main application logger."""
|
||||
|
||||
class SimpleFormatter(logging.Formatter):
|
||||
def __init__(self, with_time, with_thread):
|
||||
self.fmt = (('%(asctime)s ' if with_time else '') +
|
||||
'%(levelname)-8s ' +
|
||||
'[%(name)s:%(funcName)s' +
|
||||
(':%(lineno)s' if with_lineno else '') + ']' +
|
||||
('[%(threadName)s]' if with_thread else '') +
|
||||
' -- %(message)s')
|
||||
logging.Formatter.__init__(self, self.fmt)
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
def __init__(self, with_time, with_thread):
|
||||
self.fmt = (('%(asctime)s ' if with_time else '') +
|
||||
'-CC-%(levelname)-8s ' +
|
||||
BLUE_FONT + '[%(name)s:%(funcName)s' +
|
||||
(':%(lineno)s' if with_lineno else '') + ']' +
|
||||
RESET_FONT + ('[%(threadName)s]' if with_thread else '') +
|
||||
' -- %(message)s')
|
||||
|
||||
logging.Formatter.__init__(self, self.fmt)
|
||||
|
||||
def format(self, record):
|
||||
modpath = record.name.split('.')
|
||||
record.mname = modpath[0]
|
||||
record.mmodule = '.'.join(modpath[1:])
|
||||
result = logging.Formatter.format(self, record)
|
||||
if record.levelno == logging.DEBUG:
|
||||
color = BLUE_FONT
|
||||
elif record.levelno == logging.INFO:
|
||||
color = GREEN_FONT
|
||||
elif record.levelno == logging.WARNING:
|
||||
color = YELLOW_FONT
|
||||
else:
|
||||
color = RED_FONT
|
||||
|
||||
result = result.replace('-CC-', color)
|
||||
return result
|
||||
|
||||
if filename is not None:
|
||||
# make sure we can write to our log file
|
||||
logdir = os.path.dirname(filename)
|
||||
if not os.path.exists(logdir):
|
||||
os.makedirs(logdir)
|
||||
ch = logging.FileHandler(filename, mode='w')
|
||||
ch.setFormatter(SimpleFormatter(with_time, with_thread))
|
||||
else:
|
||||
ch = logging.StreamHandler()
|
||||
if colored and sys.platform != 'win32':
|
||||
ch.setFormatter(ColoredFormatter(with_time, with_thread))
|
||||
else:
|
||||
ch.setFormatter(SimpleFormatter(with_time, with_thread))
|
||||
|
||||
logging.getLogger().addHandler(ch)
|
352
lib/guessit/textutils.py
Normal file
352
lib/guessit/textutils.py
Normal file
|
@ -0,0 +1,352 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit import s
|
||||
from guessit.patterns import sep
|
||||
import functools
|
||||
import unicodedata
|
||||
import re
|
||||
|
||||
# string-related functions
|
||||
|
||||
|
||||
def normalize_unicode(s):
|
||||
return unicodedata.normalize('NFC', s)
|
||||
|
||||
|
||||
def strip_brackets(s):
|
||||
if not s:
|
||||
return s
|
||||
|
||||
if ((s[0] == '[' and s[-1] == ']') or
|
||||
(s[0] == '(' and s[-1] == ')') or
|
||||
(s[0] == '{' and s[-1] == '}')):
|
||||
return s[1:-1]
|
||||
|
||||
return s
|
||||
|
||||
|
||||
_dotted_rexp = re.compile(r'(?:\W|^)(([A-Za-z]\.){2,}[A-Za-z]\.?)')
|
||||
|
||||
|
||||
def clean_string(st):
|
||||
for c in sep:
|
||||
# do not remove certain chars
|
||||
if c in ['-', ',']:
|
||||
continue
|
||||
|
||||
if c == '.':
|
||||
# we should not remove the dots for acronyms and such
|
||||
dotted = _dotted_rexp.search(st)
|
||||
if dotted:
|
||||
s = dotted.group(1)
|
||||
exclude_begin, exclude_end = dotted.span(1)
|
||||
|
||||
st = (st[:exclude_begin].replace(c, ' ') +
|
||||
st[exclude_begin:exclude_end] +
|
||||
st[exclude_end:].replace(c, ' '))
|
||||
continue
|
||||
|
||||
st = st.replace(c, ' ')
|
||||
|
||||
parts = st.split()
|
||||
result = ' '.join(p for p in parts if p != '')
|
||||
|
||||
# now also remove dashes on the outer part of the string
|
||||
while result and result[0] in '-':
|
||||
result = result[1:]
|
||||
while result and result[-1] in '-':
|
||||
result = result[:-1]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
_words_rexp = re.compile('\w+', re.UNICODE)
|
||||
|
||||
|
||||
def find_words(s):
|
||||
return _words_rexp.findall(s.replace('_', ' '))
|
||||
|
||||
|
||||
def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
||||
ltitle = title.lower()
|
||||
for article in articles:
|
||||
for separator in separators:
|
||||
suffix = separator + article
|
||||
if ltitle[-len(suffix):] == suffix:
|
||||
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
|
||||
return title
|
||||
|
||||
|
||||
def str_replace(string, pos, c):
|
||||
return string[:pos] + c + string[pos + 1:]
|
||||
|
||||
|
||||
def str_fill(string, region, c):
|
||||
start, end = region
|
||||
return string[:start] + c * (end - start) + string[end:]
|
||||
|
||||
|
||||
def levenshtein(a, b):
|
||||
if not a:
|
||||
return len(b)
|
||||
if not b:
|
||||
return len(a)
|
||||
|
||||
m = len(a)
|
||||
n = len(b)
|
||||
d = []
|
||||
for i in range(m + 1):
|
||||
d.append([0] * (n + 1))
|
||||
|
||||
for i in range(m + 1):
|
||||
d[i][0] = i
|
||||
|
||||
for j in range(n + 1):
|
||||
d[0][j] = j
|
||||
|
||||
for i in range(1, m + 1):
|
||||
for j in range(1, n + 1):
|
||||
if a[i - 1] == b[j - 1]:
|
||||
cost = 0
|
||||
else:
|
||||
cost = 1
|
||||
|
||||
d[i][j] = min(d[i - 1][j] + 1, # deletion
|
||||
d[i][j - 1] + 1, # insertion
|
||||
d[i - 1][j - 1] + cost # substitution
|
||||
)
|
||||
|
||||
return d[m][n]
|
||||
|
||||
|
||||
# group-related functions
|
||||
|
||||
def find_first_level_groups_span(string, enclosing):
|
||||
"""Return a list of pairs (start, end) for the groups delimited by the given
|
||||
enclosing characters.
|
||||
This does not return nested groups, ie: '(ab(c)(d))' will return a single group
|
||||
containing the whole string.
|
||||
|
||||
>>> find_first_level_groups_span('abcd', '()')
|
||||
[]
|
||||
|
||||
>>> find_first_level_groups_span('abc(de)fgh', '()')
|
||||
[(3, 7)]
|
||||
|
||||
>>> find_first_level_groups_span('(ab(c)(d))', '()')
|
||||
[(0, 10)]
|
||||
|
||||
>>> find_first_level_groups_span('ab[c]de[f]gh(i)', '[]')
|
||||
[(2, 5), (7, 10)]
|
||||
"""
|
||||
opening, closing = enclosing
|
||||
depth = [] # depth is a stack of indices where we opened a group
|
||||
result = []
|
||||
for i, c, in enumerate(string):
|
||||
if c == opening:
|
||||
depth.append(i)
|
||||
elif c == closing:
|
||||
try:
|
||||
start = depth.pop()
|
||||
end = i
|
||||
if not depth:
|
||||
# we emptied our stack, so we have a 1st level group
|
||||
result.append((start, end + 1))
|
||||
except IndexError:
|
||||
# we closed a group which was not opened before
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def split_on_groups(string, groups):
|
||||
"""Split the given string using the different known groups for boundaries.
|
||||
>>> s(split_on_groups('0123456789', [ (2, 4) ]))
|
||||
['01', '23', '456789']
|
||||
|
||||
>>> s(split_on_groups('0123456789', [ (2, 4), (4, 6) ]))
|
||||
['01', '23', '45', '6789']
|
||||
|
||||
>>> s(split_on_groups('0123456789', [ (5, 7), (2, 4) ]))
|
||||
['01', '23', '4', '56', '789']
|
||||
|
||||
"""
|
||||
if not groups:
|
||||
return [string]
|
||||
|
||||
boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, [])))
|
||||
if boundaries[0] != 0:
|
||||
boundaries.insert(0, 0)
|
||||
if boundaries[-1] != len(string):
|
||||
boundaries.append(len(string))
|
||||
|
||||
groups = [string[start:end] for start, end in zip(boundaries[:-1],
|
||||
boundaries[1:])]
|
||||
|
||||
return [g for g in groups if g] # return only non-empty groups
|
||||
|
||||
|
||||
def find_first_level_groups(string, enclosing, blank_sep=None):
|
||||
"""Return a list of groups that could be split because of explicit grouping.
|
||||
The groups are delimited by the given enclosing characters.
|
||||
|
||||
You can also specify if you want to blank the separator chars in the returned
|
||||
list of groups by specifying a character for it. None means it won't be replaced.
|
||||
|
||||
This does not return nested groups, ie: '(ab(c)(d))' will return a single group
|
||||
containing the whole string.
|
||||
|
||||
>>> s(find_first_level_groups('', '()'))
|
||||
['']
|
||||
|
||||
>>> s(find_first_level_groups('abcd', '()'))
|
||||
['abcd']
|
||||
|
||||
>>> s(find_first_level_groups('abc(de)fgh', '()'))
|
||||
['abc', '(de)', 'fgh']
|
||||
|
||||
>>> s(find_first_level_groups('(ab(c)(d))', '()', blank_sep = '_'))
|
||||
['_ab(c)(d)_']
|
||||
|
||||
>>> s(find_first_level_groups('ab[c]de[f]gh(i)', '[]'))
|
||||
['ab', '[c]', 'de', '[f]', 'gh(i)']
|
||||
|
||||
>>> s(find_first_level_groups('()[]()', '()', blank_sep = '-'))
|
||||
['--', '[]', '--']
|
||||
|
||||
"""
|
||||
groups = find_first_level_groups_span(string, enclosing)
|
||||
if blank_sep:
|
||||
for start, end in groups:
|
||||
string = str_replace(string, start, blank_sep)
|
||||
string = str_replace(string, end - 1, blank_sep)
|
||||
|
||||
return split_on_groups(string, groups)
|
||||
|
||||
|
||||
_camel_word2_set = set(('is', 'to',))
|
||||
_camel_word3_set = set(('the',))
|
||||
|
||||
|
||||
def _camel_split_and_lower(string, i):
|
||||
"""Retrieves a tuple (need_split, need_lower)
|
||||
|
||||
need_split is True if this char is a first letter in a camelCasedString.
|
||||
need_lower is True if this char should be lowercased.
|
||||
"""
|
||||
|
||||
def islower(c):
|
||||
return c.isalpha() and not c.isupper()
|
||||
|
||||
previous_char2 = string[i - 2] if i > 1 else None
|
||||
previous_char = string[i - 1] if i > 0 else None
|
||||
char = string[i]
|
||||
next_char = string[i + 1] if i + 1 < len(string) else None
|
||||
next_char2 = string[i + 2] if i + 2 < len(string) else None
|
||||
|
||||
char_upper = char.isupper()
|
||||
char_lower = islower(char)
|
||||
|
||||
# previous_char2_lower = islower(previous_char2) if previous_char2 else False
|
||||
previous_char2_upper = previous_char2.isupper() if previous_char2 else False
|
||||
|
||||
previous_char_lower = islower(previous_char) if previous_char else False
|
||||
previous_char_upper = previous_char.isupper() if previous_char else False
|
||||
|
||||
next_char_upper = next_char.isupper() if next_char else False
|
||||
next_char_lower = islower(next_char) if next_char else False
|
||||
|
||||
next_char2_upper = next_char2.isupper() if next_char2 else False
|
||||
# next_char2_lower = islower(next_char2) if next_char2 else False
|
||||
|
||||
mixedcase_word = (previous_char_upper and char_lower and next_char_upper) or \
|
||||
(previous_char_lower and char_upper and next_char_lower and next_char2_upper) or \
|
||||
(previous_char2_upper and previous_char_lower and char_upper)
|
||||
if mixedcase_word:
|
||||
word2 = (char + next_char).lower() if next_char else None
|
||||
word3 = (char + next_char + next_char2).lower() if next_char and next_char2 else None
|
||||
word2b = (previous_char2 + previous_char).lower() if previous_char2 and previous_char else None
|
||||
if word2 in _camel_word2_set or word2b in _camel_word2_set or word3 in _camel_word3_set:
|
||||
mixedcase_word = False
|
||||
|
||||
uppercase_word = previous_char_upper and char_upper and next_char_upper or (char_upper and next_char_upper and next_char2_upper)
|
||||
|
||||
need_split = char_upper and previous_char_lower and not mixedcase_word
|
||||
|
||||
if not need_split:
|
||||
previous_char_upper = string[i - 1].isupper() if i > 0 else False
|
||||
next_char_lower = (string[i + 1].isalpha() and not string[i + 1].isupper()) if i + 1 < len(string) else False
|
||||
need_split = char_upper and previous_char_upper and next_char_lower
|
||||
uppercase_word = previous_char_upper and not next_char_lower
|
||||
|
||||
need_lower = not uppercase_word and not mixedcase_word and need_split
|
||||
|
||||
return (need_split, need_lower)
|
||||
|
||||
|
||||
def is_camel(string):
|
||||
"""
|
||||
>>> is_camel('dogEATDog')
|
||||
True
|
||||
>>> is_camel('DeathToCamelCase')
|
||||
True
|
||||
>>> is_camel('death_to_camel_case')
|
||||
False
|
||||
>>> is_camel('TheBest')
|
||||
True
|
||||
>>> is_camel('The Best')
|
||||
False
|
||||
"""
|
||||
for i in range(0, len(string)):
|
||||
need_split, _ = _camel_split_and_lower(string, i)
|
||||
if need_split:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def from_camel(string):
|
||||
"""
|
||||
>>> from_camel('dogEATDog') == 'dog EAT dog'
|
||||
True
|
||||
>>> from_camel('DeathToCamelCase') == 'Death to camel case'
|
||||
True
|
||||
>>> from_camel('TheBest') == 'The best'
|
||||
True
|
||||
>>> from_camel('MiXedCaSe is not camelCase') == 'MiXedCaSe is not camel case'
|
||||
True
|
||||
"""
|
||||
if not string:
|
||||
return string
|
||||
pieces = []
|
||||
|
||||
for i in range(0, len(string)):
|
||||
char = string[i]
|
||||
need_split, need_lower = _camel_split_and_lower(string, i)
|
||||
if need_split:
|
||||
pieces.append(' ')
|
||||
|
||||
if need_lower:
|
||||
pieces.append(char.lower())
|
||||
else:
|
||||
pieces.append(char)
|
||||
return ''.join(pieces)
|
341
lib/guessit/tlds-alpha-by-domain.txt
Normal file
341
lib/guessit/tlds-alpha-by-domain.txt
Normal file
|
@ -0,0 +1,341 @@
|
|||
# Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
|
||||
AC
|
||||
AD
|
||||
AE
|
||||
AERO
|
||||
AF
|
||||
AG
|
||||
AI
|
||||
AL
|
||||
AM
|
||||
AN
|
||||
AO
|
||||
AQ
|
||||
AR
|
||||
ARPA
|
||||
AS
|
||||
ASIA
|
||||
AT
|
||||
AU
|
||||
AW
|
||||
AX
|
||||
AZ
|
||||
BA
|
||||
BB
|
||||
BD
|
||||
BE
|
||||
BF
|
||||
BG
|
||||
BH
|
||||
BI
|
||||
BIKE
|
||||
BIZ
|
||||
BJ
|
||||
BM
|
||||
BN
|
||||
BO
|
||||
BR
|
||||
BS
|
||||
BT
|
||||
BV
|
||||
BW
|
||||
BY
|
||||
BZ
|
||||
CA
|
||||
CAMERA
|
||||
CAT
|
||||
CC
|
||||
CD
|
||||
CF
|
||||
CG
|
||||
CH
|
||||
CI
|
||||
CK
|
||||
CL
|
||||
CLOTHING
|
||||
CM
|
||||
CN
|
||||
CO
|
||||
COM
|
||||
CONSTRUCTION
|
||||
CONTRACTORS
|
||||
COOP
|
||||
CR
|
||||
CU
|
||||
CV
|
||||
CW
|
||||
CX
|
||||
CY
|
||||
CZ
|
||||
DE
|
||||
DIAMONDS
|
||||
DIRECTORY
|
||||
DJ
|
||||
DK
|
||||
DM
|
||||
DO
|
||||
DZ
|
||||
EC
|
||||
EDU
|
||||
EE
|
||||
EG
|
||||
ENTERPRISES
|
||||
EQUIPMENT
|
||||
ER
|
||||
ES
|
||||
ESTATE
|
||||
ET
|
||||
EU
|
||||
FI
|
||||
FJ
|
||||
FK
|
||||
FM
|
||||
FO
|
||||
FR
|
||||
GA
|
||||
GALLERY
|
||||
GB
|
||||
GD
|
||||
GE
|
||||
GF
|
||||
GG
|
||||
GH
|
||||
GI
|
||||
GL
|
||||
GM
|
||||
GN
|
||||
GOV
|
||||
GP
|
||||
GQ
|
||||
GR
|
||||
GRAPHICS
|
||||
GS
|
||||
GT
|
||||
GU
|
||||
GURU
|
||||
GW
|
||||
GY
|
||||
HK
|
||||
HM
|
||||
HN
|
||||
HOLDINGS
|
||||
HR
|
||||
HT
|
||||
HU
|
||||
ID
|
||||
IE
|
||||
IL
|
||||
IM
|
||||
IN
|
||||
INFO
|
||||
INT
|
||||
IO
|
||||
IQ
|
||||
IR
|
||||
IS
|
||||
IT
|
||||
JE
|
||||
JM
|
||||
JO
|
||||
JOBS
|
||||
JP
|
||||
KE
|
||||
KG
|
||||
KH
|
||||
KI
|
||||
KITCHEN
|
||||
KM
|
||||
KN
|
||||
KP
|
||||
KR
|
||||
KW
|
||||
KY
|
||||
KZ
|
||||
LA
|
||||
LAND
|
||||
LB
|
||||
LC
|
||||
LI
|
||||
LIGHTING
|
||||
LK
|
||||
LR
|
||||
LS
|
||||
LT
|
||||
LU
|
||||
LV
|
||||
LY
|
||||
MA
|
||||
MC
|
||||
MD
|
||||
ME
|
||||
MG
|
||||
MH
|
||||
MIL
|
||||
MK
|
||||
ML
|
||||
MM
|
||||
MN
|
||||
MO
|
||||
MOBI
|
||||
MP
|
||||
MQ
|
||||
MR
|
||||
MS
|
||||
MT
|
||||
MU
|
||||
MUSEUM
|
||||
MV
|
||||
MW
|
||||
MX
|
||||
MY
|
||||
MZ
|
||||
NA
|
||||
NAME
|
||||
NC
|
||||
NE
|
||||
NET
|
||||
NF
|
||||
NG
|
||||
NI
|
||||
NL
|
||||
NO
|
||||
NP
|
||||
NR
|
||||
NU
|
||||
NZ
|
||||
OM
|
||||
ORG
|
||||
PA
|
||||
PE
|
||||
PF
|
||||
PG
|
||||
PH
|
||||
PHOTOGRAPHY
|
||||
PK
|
||||
PL
|
||||
PLUMBING
|
||||
PM
|
||||
PN
|
||||
POST
|
||||
PR
|
||||
PRO
|
||||
PS
|
||||
PT
|
||||
PW
|
||||
PY
|
||||
QA
|
||||
RE
|
||||
RO
|
||||
RS
|
||||
RU
|
||||
RW
|
||||
SA
|
||||
SB
|
||||
SC
|
||||
SD
|
||||
SE
|
||||
SEXY
|
||||
SG
|
||||
SH
|
||||
SI
|
||||
SINGLES
|
||||
SJ
|
||||
SK
|
||||
SL
|
||||
SM
|
||||
SN
|
||||
SO
|
||||
SR
|
||||
ST
|
||||
SU
|
||||
SV
|
||||
SX
|
||||
SY
|
||||
SZ
|
||||
TATTOO
|
||||
TC
|
||||
TD
|
||||
TECHNOLOGY
|
||||
TEL
|
||||
TF
|
||||
TG
|
||||
TH
|
||||
TIPS
|
||||
TJ
|
||||
TK
|
||||
TL
|
||||
TM
|
||||
TN
|
||||
TO
|
||||
TODAY
|
||||
TP
|
||||
TR
|
||||
TRAVEL
|
||||
TT
|
||||
TV
|
||||
TW
|
||||
TZ
|
||||
UA
|
||||
UG
|
||||
UK
|
||||
US
|
||||
UY
|
||||
UZ
|
||||
VA
|
||||
VC
|
||||
VE
|
||||
VENTURES
|
||||
VG
|
||||
VI
|
||||
VN
|
||||
VOYAGE
|
||||
VU
|
||||
WF
|
||||
WS
|
||||
XN--3E0B707E
|
||||
XN--45BRJ9C
|
||||
XN--80AO21A
|
||||
XN--80ASEHDB
|
||||
XN--80ASWG
|
||||
XN--90A3AC
|
||||
XN--CLCHC0EA0B2G2A9GCD
|
||||
XN--FIQS8S
|
||||
XN--FIQZ9S
|
||||
XN--FPCRJ9C3D
|
||||
XN--FZC2C9E2C
|
||||
XN--GECRJ9C
|
||||
XN--H2BRJ9C
|
||||
XN--J1AMH
|
||||
XN--J6W193G
|
||||
XN--KPRW13D
|
||||
XN--KPRY57D
|
||||
XN--L1ACC
|
||||
XN--LGBBAT1AD8J
|
||||
XN--MGB9AWBF
|
||||
XN--MGBA3A4F16A
|
||||
XN--MGBAAM7A8H
|
||||
XN--MGBAYH7GPA
|
||||
XN--MGBBH1A71E
|
||||
XN--MGBC0A9AZCG
|
||||
XN--MGBERP4A5D4AR
|
||||
XN--MGBX4CD0AB
|
||||
XN--NGBC5AZD
|
||||
XN--O3CW4H
|
||||
XN--OGBPF8FL
|
||||
XN--P1AI
|
||||
XN--PGBS0DH
|
||||
XN--Q9JYB4C
|
||||
XN--S9BRJ9C
|
||||
XN--UNUP4Y
|
||||
XN--WGBH1C
|
||||
XN--WGBL6A
|
||||
XN--XKC2AL3HYE2A
|
||||
XN--XKC2DL3A5EE0H
|
||||
XN--YFRO4I67O
|
||||
XN--YGBI2AMMX
|
||||
XXX
|
||||
YE
|
||||
YT
|
||||
ZA
|
||||
ZM
|
||||
ZW
|
30
lib/guessit/transfo/__init__.py
Normal file
30
lib/guessit/transfo/__init__.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
|
||||
class TransformerException(Exception):
|
||||
def __init__(self, transformer, message):
|
||||
|
||||
# Call the base class constructor with the parameters it needs
|
||||
Exception.__init__(self, message)
|
||||
|
||||
self.transformer = transformer
|
67
lib/guessit/transfo/guess_bonus_features.py
Normal file
67
lib/guessit/transfo/guess_bonus_features.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import found_property
|
||||
|
||||
|
||||
class GuessBonusFeatures(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -150)
|
||||
|
||||
def supported_properties(self):
|
||||
return ['bonusNumber', 'bonusTitle', 'filmNumber', 'filmSeries', 'title', 'series']
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
def previous_group(g):
|
||||
for leaf in mtree.unidentified_leaves()[::-1]:
|
||||
if leaf.node_idx < g.node_idx:
|
||||
return leaf
|
||||
|
||||
def next_group(g):
|
||||
for leaf in mtree.unidentified_leaves():
|
||||
if leaf.node_idx > g.node_idx:
|
||||
return leaf
|
||||
|
||||
def same_group(g1, g2):
|
||||
return g1.node_idx[:2] == g2.node_idx[:2]
|
||||
|
||||
bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess]
|
||||
if bonus:
|
||||
bonusTitle = next_group(bonus[0])
|
||||
if bonusTitle and same_group(bonusTitle, bonus[0]):
|
||||
found_property(bonusTitle, 'bonusTitle', confidence=0.8)
|
||||
|
||||
filmNumber = [node for node in mtree.leaves()
|
||||
if 'filmNumber' in node.guess]
|
||||
if filmNumber:
|
||||
filmSeries = previous_group(filmNumber[0])
|
||||
found_property(filmSeries, 'filmSeries', confidence=0.9)
|
||||
|
||||
title = next_group(filmNumber[0])
|
||||
found_property(title, 'title', confidence=0.9)
|
||||
|
||||
season = [node for node in mtree.leaves() if 'season' in node.guess]
|
||||
if season and 'bonusNumber' in mtree.info:
|
||||
series = previous_group(season[0])
|
||||
if same_group(series, season[0]):
|
||||
found_property(series, 'series', confidence=0.9)
|
69
lib/guessit/transfo/guess_country.py
Normal file
69
lib/guessit/transfo/guess_country.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.country import Country
|
||||
from guessit import Guess
|
||||
|
||||
|
||||
class GuessCountry(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -170)
|
||||
# list of common words which could be interpreted as countries, but which
|
||||
# are far too common to be able to say they represent a country
|
||||
self.country_common_words = frozenset(['bt', 'bb'])
|
||||
|
||||
def supported_properties(self):
|
||||
return ['country']
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
options = options or {}
|
||||
return 'nocountry' not in options.keys()
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
for node in mtree.unidentified_leaves():
|
||||
if len(node.node_idx) == 2:
|
||||
c = node.value[1:-1].lower()
|
||||
if c in self.country_common_words:
|
||||
continue
|
||||
|
||||
# only keep explicit groups (enclosed in parentheses/brackets)
|
||||
if not node.is_explicit():
|
||||
continue
|
||||
|
||||
try:
|
||||
country = Country(c, strict=True)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
node.guess = Guess(country=country, confidence=1.0, input=node.value, span=node.span)
|
||||
|
||||
def post_process(self, mtree, options=None, *args, **kwargs):
|
||||
# if country is in the guessed properties, make it part of the series name
|
||||
series_leaves = mtree.leaves_containing('series')
|
||||
country_leaves = mtree.leaves_containing('country')
|
||||
|
||||
if series_leaves and country_leaves:
|
||||
country_leaf = country_leaves[0]
|
||||
for serie_leaf in series_leaves:
|
||||
serie_leaf.guess['series'] += ' (%s)' % country_leaf.guess['country'].alpha2.upper()
|
||||
#result['series'] += ' (%s)' % result['country'].alpha2.upper()
|
43
lib/guessit/transfo/guess_date.py
Normal file
43
lib/guessit/transfo/guess_date.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from guessit.date import search_date
|
||||
|
||||
|
||||
class GuessDate(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 50)
|
||||
|
||||
def supported_properties(self):
|
||||
return ['date']
|
||||
|
||||
def guess_date(self, string, node=None, options=None):
|
||||
date, span = search_date(string)
|
||||
if date:
|
||||
return {'date': date}, span
|
||||
else:
|
||||
return None, None
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
162
lib/guessit/transfo/guess_episode_info_from_position.py
Normal file
162
lib/guessit/transfo/guess_episode_info_from_position.py
Normal file
|
@ -0,0 +1,162 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer, get_transformer
|
||||
from guessit.textutils import reorder_title
|
||||
|
||||
from guessit.matcher import found_property
|
||||
|
||||
|
||||
class GuessEpisodeInfoFromPosition(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -200)
|
||||
|
||||
def supported_properties(self):
|
||||
return ['title', 'series']
|
||||
|
||||
def match_from_epnum_position(self, mtree, node):
|
||||
epnum_idx = node.node_idx
|
||||
|
||||
# a few helper functions to be able to filter using high-level semantics
|
||||
def before_epnum_in_same_pathgroup():
|
||||
return [leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] < epnum_idx[1:])]
|
||||
|
||||
def after_epnum_in_same_pathgroup():
|
||||
return [leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] > epnum_idx[1:])]
|
||||
|
||||
def after_epnum_in_same_explicitgroup():
|
||||
return [leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[:2] == epnum_idx[:2] and
|
||||
leaf.node_idx[2:] > epnum_idx[2:])]
|
||||
|
||||
# epnumber is the first group and there are only 2 after it in same
|
||||
# path group
|
||||
# -> series title - episode title
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
|
||||
|
||||
if ('title' not in mtree.info and # no title
|
||||
before_epnum_in_same_pathgroup() == [] and # no groups before
|
||||
len(title_candidates) == 2): # only 2 groups after
|
||||
|
||||
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||
found_property(title_candidates[1], 'title', confidence=0.4)
|
||||
return
|
||||
|
||||
# if we have at least 1 valid group before the episodeNumber, then it's
|
||||
# probably the series name
|
||||
series_candidates = before_epnum_in_same_pathgroup()
|
||||
if len(series_candidates) >= 1:
|
||||
found_property(series_candidates[0], 'series', confidence=0.7)
|
||||
|
||||
# only 1 group after (in the same path group) and it's probably the
|
||||
# episode title
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
|
||||
|
||||
if len(title_candidates) == 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.5)
|
||||
return
|
||||
else:
|
||||
# try in the same explicit group, with lower confidence
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup())
|
||||
if len(title_candidates) == 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.4)
|
||||
return
|
||||
elif len(title_candidates) > 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.3)
|
||||
return
|
||||
|
||||
# get the one with the longest value
|
||||
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
|
||||
if title_candidates:
|
||||
maxidx = -1
|
||||
maxv = -1
|
||||
for i, c in enumerate(title_candidates):
|
||||
if len(c.clean_value) > maxv:
|
||||
maxidx = i
|
||||
maxv = len(c.clean_value)
|
||||
found_property(title_candidates[maxidx], 'title', confidence=0.3)
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
options = options or {}
|
||||
return not options.get('skip_title') and mtree.guess.get('type', '').startswith('episode')
|
||||
|
||||
def _filter_candidates(self, candidates):
|
||||
episode_special_transformer = get_transformer('guess_episode_special')
|
||||
if episode_special_transformer:
|
||||
return [n for n in candidates if not episode_special_transformer.container.find_properties(n.value, n, re_match=True)]
|
||||
else:
|
||||
return candidates
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""
|
||||
try to identify the remaining unknown groups by looking at their
|
||||
position relative to other known elements
|
||||
"""
|
||||
eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
|
||||
if eps:
|
||||
self.match_from_epnum_position(mtree, eps[0])
|
||||
|
||||
else:
|
||||
# if we don't have the episode number, but at least 2 groups in the
|
||||
# basename, then it's probably series - eptitle
|
||||
basename = mtree.node_at((-2,))
|
||||
|
||||
title_candidates = self._filter_candidates(basename.unidentified_leaves())
|
||||
|
||||
if len(title_candidates) >= 2:
|
||||
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||
found_property(title_candidates[1], 'title', confidence=0.4)
|
||||
elif len(title_candidates) == 1:
|
||||
# but if there's only one candidate, it's probably the series name
|
||||
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||
|
||||
# if we only have 1 remaining valid group in the folder containing the
|
||||
# file, then it's likely that it is the series name
|
||||
try:
|
||||
series_candidates = mtree.node_at((-3,)).unidentified_leaves()
|
||||
except ValueError:
|
||||
series_candidates = []
|
||||
|
||||
if len(series_candidates) == 1:
|
||||
found_property(series_candidates[0], 'series', confidence=0.3)
|
||||
|
||||
# if there's a path group that only contains the season info, then the
|
||||
# previous one is most likely the series title (ie: ../series/season X/..)
|
||||
eps = [node for node in mtree.nodes()
|
||||
if 'season' in node.guess and 'episodeNumber' not in node.guess]
|
||||
|
||||
if eps:
|
||||
previous = [node for node in mtree.unidentified_leaves()
|
||||
if node.node_idx[0] == eps[0].node_idx[0] - 1]
|
||||
if len(previous) == 1:
|
||||
found_property(previous[0], 'series', confidence=0.5)
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
for node in mtree.nodes():
|
||||
if 'series' not in node.guess:
|
||||
continue
|
||||
|
||||
node.guess['series'] = reorder_title(node.guess['series'])
|
62
lib/guessit/transfo/guess_episode_special.py
Normal file
62
lib/guessit/transfo/guess_episode_special.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import found_guess
|
||||
from guessit.containers import PropertiesContainer
|
||||
|
||||
|
||||
class GuessEpisodeSpecial(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -205)
|
||||
self.container = PropertiesContainer()
|
||||
self.container.register_property('special', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired')
|
||||
self.container.register_property('special', 'Extras?', canonical_form='Extras')
|
||||
|
||||
def guess_special(self, string, node=None, options=None):
|
||||
properties = self.container.find_properties(string, node, 'special', multiple=True)
|
||||
guesses = self.container.as_guess(properties, multiple=True)
|
||||
return guesses
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
if not mtree.guess.get('type', '').startswith('episode'):
|
||||
for unidentified_leaf in mtree.unidentified_leaves():
|
||||
properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, 'special')
|
||||
guess = self.container.as_guess(properties)
|
||||
if guess:
|
||||
return {'type': 'episode'}
|
||||
return None
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
if mtree.guess.get('type', '').startswith('episode') and (not mtree.info.get('episodeNumber') or mtree.info.get('season') == 0):
|
||||
for title_leaf in mtree.leaves_containing('title'):
|
||||
guesses = self.guess_special(title_leaf.value, title_leaf, options)
|
||||
for guess in guesses:
|
||||
found_guess(title_leaf, guess, update_guess=False)
|
||||
for unidentified_leaf in mtree.unidentified_leaves():
|
||||
guesses = self.guess_special(unidentified_leaf.value, unidentified_leaf, options)
|
||||
for guess in guesses:
|
||||
found_guess(unidentified_leaf, guess, update_guess=False)
|
||||
return None
|
80
lib/guessit/transfo/guess_episodes_rexps.py
Normal file
80
lib/guessit/transfo/guess_episodes_rexps.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from guessit.patterns import sep
|
||||
from guessit.containers import PropertiesContainer, WeakValidator, NoValidator
|
||||
from guessit.patterns.numeral import numeral, digital_numeral, parse_numeral
|
||||
from re import split as re_split
|
||||
|
||||
|
||||
class GuessEpisodesRexps(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 20)
|
||||
|
||||
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||
|
||||
def episode_parser(value):
|
||||
values = re_split('[a-zA-Z]', value)
|
||||
values = [x for x in values if x]
|
||||
ret = []
|
||||
for letters_elt in values:
|
||||
dashed_values = letters_elt.split('-')
|
||||
dashed_values = [x for x in dashed_values if x]
|
||||
if len(dashed_values) > 1:
|
||||
for _ in range(0, len(dashed_values) - 1):
|
||||
start_dash_ep = parse_numeral(dashed_values[0])
|
||||
end_dash_ep = parse_numeral(dashed_values[1])
|
||||
for dash_ep in range(start_dash_ep, end_dash_ep + 1):
|
||||
ret.append(dash_ep)
|
||||
else:
|
||||
ret.append(parse_numeral(letters_elt))
|
||||
if len(ret) > 1:
|
||||
return {None: ret[0], 'episodeList': ret} # TODO: Should support seasonList also
|
||||
elif len(ret) > 0:
|
||||
return ret[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator())
|
||||
self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||
self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator())
|
||||
self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral)
|
||||
self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral)
|
||||
|
||||
self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())
|
||||
|
||||
def supported_properties(self):
|
||||
return ['episodeNumber', 'season']
|
||||
|
||||
def guess_episodes_rexps(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node)
|
||||
return self.container.as_guess(found, string)
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
return mtree.guess.get('type', '').startswith('episode')
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
213
lib/guessit/transfo/guess_filetype.py
Normal file
213
lib/guessit/transfo/guess_filetype.py
Normal file
|
@ -0,0 +1,213 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import mimetypes
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from guessit.guess import Guess
|
||||
from guessit.patterns.extension import subtitle_exts, info_exts, video_exts
|
||||
from guessit.transfo import TransformerException
|
||||
from guessit.plugins.transformers import Transformer, get_transformer
|
||||
from guessit.matcher import log_found_guess, found_guess
|
||||
from guessit.textutils import clean_string
|
||||
|
||||
|
||||
class GuessFiletype(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 250)
|
||||
|
||||
# List of well known movies and series, hardcoded because they cannot be
|
||||
# guessed appropriately otherwise
|
||||
MOVIES = ['OSS 117']
|
||||
SERIES = ['Band of Brothers']
|
||||
|
||||
MOVIES = [m.lower() for m in MOVIES]
|
||||
SERIES = [s.lower() for s in SERIES]
|
||||
|
||||
def guess_filetype(self, mtree, options=None):
|
||||
options = options or {}
|
||||
|
||||
# put the filetype inside a dummy container to be able to have the
|
||||
# following functions work correctly as closures
|
||||
# this is a workaround for python 2 which doesn't have the
|
||||
# 'nonlocal' keyword which we could use here in the upgrade_* functions
|
||||
# (python 3 does have it)
|
||||
filetype_container = [mtree.guess.get('type')]
|
||||
other = {}
|
||||
filename = mtree.string
|
||||
|
||||
def upgrade_episode():
|
||||
if filetype_container[0] == 'subtitle':
|
||||
filetype_container[0] = 'episodesubtitle'
|
||||
elif filetype_container[0] == 'info':
|
||||
filetype_container[0] = 'episodeinfo'
|
||||
elif not filetype_container[0]:
|
||||
filetype_container[0] = 'episode'
|
||||
|
||||
def upgrade_movie():
|
||||
if filetype_container[0] == 'subtitle':
|
||||
filetype_container[0] = 'moviesubtitle'
|
||||
elif filetype_container[0] == 'info':
|
||||
filetype_container[0] = 'movieinfo'
|
||||
elif not filetype_container[0]:
|
||||
filetype_container[0] = 'movie'
|
||||
|
||||
def upgrade_subtitle():
|
||||
if filetype_container[0] == 'movie':
|
||||
filetype_container[0] = 'moviesubtitle'
|
||||
elif filetype_container[0] == 'episode':
|
||||
filetype_container[0] = 'episodesubtitle'
|
||||
elif not filetype_container[0]:
|
||||
filetype_container[0] = 'subtitle'
|
||||
|
||||
def upgrade_info():
|
||||
if filetype_container[0] == 'movie':
|
||||
filetype_container[0] = 'movieinfo'
|
||||
elif filetype_container[0] == 'episode':
|
||||
filetype_container[0] = 'episodeinfo'
|
||||
elif not filetype_container[0]:
|
||||
filetype_container[0] = 'info'
|
||||
|
||||
# look at the extension first
|
||||
fileext = os.path.splitext(filename)[1][1:].lower()
|
||||
if fileext in subtitle_exts:
|
||||
upgrade_subtitle()
|
||||
other = {'container': fileext}
|
||||
elif fileext in info_exts:
|
||||
upgrade_info()
|
||||
other = {'container': fileext}
|
||||
elif fileext in video_exts:
|
||||
other = {'container': fileext}
|
||||
else:
|
||||
if fileext and not options.get('name_only'):
|
||||
other = {'extension': fileext}
|
||||
|
||||
# check whether we are in a 'Movies', 'Tv Shows', ... folder
|
||||
folder_rexps = [
|
||||
(r'Movies?', upgrade_movie),
|
||||
(r'Films?', upgrade_movie),
|
||||
(r'Tv[ _-]?Shows?', upgrade_episode),
|
||||
(r'Series?', upgrade_episode),
|
||||
(r'Episodes?', upgrade_episode),
|
||||
]
|
||||
for frexp, upgrade_func in folder_rexps:
|
||||
frexp = re.compile(frexp, re.IGNORECASE)
|
||||
for pathgroup in mtree.children:
|
||||
if frexp.match(pathgroup.value):
|
||||
upgrade_func()
|
||||
return filetype_container[0], other
|
||||
|
||||
# check for a few specific cases which will unintentionally make the
|
||||
# following heuristics confused (eg: OSS 117 will look like an episode,
|
||||
# season 1, epnum 17, when it is in fact a movie)
|
||||
fname = clean_string(filename).lower()
|
||||
for m in self.MOVIES:
|
||||
if m in fname:
|
||||
self.log.debug('Found in exception list of movies -> type = movie')
|
||||
upgrade_movie()
|
||||
return filetype_container[0], other
|
||||
for s in self.SERIES:
|
||||
if s in fname:
|
||||
self.log.debug('Found in exception list of series -> type = episode')
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
|
||||
# now look whether there are some specific hints for episode vs movie
|
||||
# if we have an episode_rexp (eg: s02e13), it is an episode
|
||||
episode_transformer = get_transformer('guess_episodes_rexps')
|
||||
if episode_transformer:
|
||||
guess = episode_transformer.guess_episodes_rexps(filename)
|
||||
if guess:
|
||||
self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess)
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
|
||||
properties_transformer = get_transformer('guess_properties')
|
||||
if properties_transformer:
|
||||
# if we have certain properties characteristic of episodes, it is an ep
|
||||
found = properties_transformer.container.find_properties(filename, mtree, 'episodeFormat')
|
||||
guess = properties_transformer.container.as_guess(found, filename)
|
||||
if guess:
|
||||
self.log.debug('Found characteristic property of episodes: %s"', guess)
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
|
||||
found = properties_transformer.container.find_properties(filename, mtree, 'format')
|
||||
guess = properties_transformer.container.as_guess(found, filename)
|
||||
if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'):
|
||||
# Use weak episodes only if TV or WEB source
|
||||
weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
|
||||
if weak_episode_transformer:
|
||||
guess = weak_episode_transformer.guess_weak_episodes_rexps(filename)
|
||||
if guess:
|
||||
self.log.debug('Found guess_weak_episodes_rexps: %s -> type = episode', guess)
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
|
||||
website_transformer = get_transformer('guess_website')
|
||||
if website_transformer:
|
||||
found = website_transformer.container.find_properties(filename, mtree, 'website')
|
||||
guess = website_transformer.container.as_guess(found, filename)
|
||||
if guess:
|
||||
for namepart in ('tv', 'serie', 'episode'):
|
||||
if namepart in guess['website']:
|
||||
# origin-specific type
|
||||
self.log.debug('Found characteristic property of episodes: %s', guess)
|
||||
upgrade_episode()
|
||||
return filetype_container[0], other
|
||||
|
||||
if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts):
|
||||
# if no episode info found, assume it's a movie
|
||||
self.log.debug('Nothing characteristic found, assuming type = movie')
|
||||
upgrade_movie()
|
||||
|
||||
if not filetype_container[0]:
|
||||
self.log.debug('Nothing characteristic found, assuming type = unknown')
|
||||
filetype_container[0] = 'unknown'
|
||||
|
||||
return filetype_container[0], other
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""guess the file type now (will be useful later)
|
||||
"""
|
||||
filetype, other = self.guess_filetype(mtree, options)
|
||||
|
||||
mtree.guess.set('type', filetype, confidence=1.0)
|
||||
log_found_guess(mtree.guess)
|
||||
|
||||
filetype_info = Guess(other, confidence=1.0)
|
||||
# guess the mimetype of the filename
|
||||
# TODO: handle other mimetypes not found on the default type_maps
|
||||
# mimetypes.types_map['.srt']='text/subtitle'
|
||||
mime, _ = mimetypes.guess_type(mtree.string, strict=False)
|
||||
if mime is not None:
|
||||
filetype_info.update({'mimetype': mime}, confidence=1.0)
|
||||
|
||||
node_ext = mtree.node_at((-1,))
|
||||
found_guess(node_ext, filetype_info)
|
||||
|
||||
if mtree.guess.get('type') in [None, 'unknown']:
|
||||
if options.get('name_only'):
|
||||
mtree.guess.set('type', 'movie', confidence=0.6)
|
||||
else:
|
||||
raise TransformerException(__name__, 'Unknown file type')
|
69
lib/guessit/transfo/guess_idnumber.py
Normal file
69
lib/guessit/transfo/guess_idnumber.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
import re
|
||||
|
||||
|
||||
class GuessIdnumber(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -180)
|
||||
|
||||
def supported_properties(self):
|
||||
return ['idNumber']
|
||||
|
||||
_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
|
||||
|
||||
def guess_idnumber(self, string, node=None, options=None):
|
||||
match = self._idnum.search(string)
|
||||
if match is not None:
|
||||
result = match.groupdict()
|
||||
switch_count = 0
|
||||
DIGIT = 0
|
||||
LETTER = 1
|
||||
OTHER = 2
|
||||
last = LETTER
|
||||
for c in result['idNumber']:
|
||||
if c in '0123456789':
|
||||
ci = DIGIT
|
||||
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||
ci = LETTER
|
||||
else:
|
||||
ci = OTHER
|
||||
|
||||
if ci != last:
|
||||
switch_count += 1
|
||||
|
||||
last = ci
|
||||
|
||||
switch_ratio = float(switch_count) / len(result['idNumber'])
|
||||
|
||||
# only return the result as probable if we alternate often between
|
||||
# char type (more likely for hash values than for common words)
|
||||
if switch_ratio > 0.4:
|
||||
return result, match.span()
|
||||
|
||||
return None, None
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_idnumber, 0.4, self.log, options).process_nodes(mtree.unidentified_leaves())
|
169
lib/guessit/transfo/guess_language.py
Normal file
169
lib/guessit/transfo/guess_language.py
Normal file
|
@ -0,0 +1,169 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.language import search_language, subtitle_prefixes, subtitle_suffixes
|
||||
from guessit.patterns.extension import subtitle_exts
|
||||
from guessit.textutils import clean_string, find_words
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
|
||||
|
||||
class GuessLanguage(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 30)
|
||||
|
||||
def supported_properties(self):
|
||||
return ['language', 'subtitleLanguage']
|
||||
|
||||
def guess_language(self, string, node=None, options=None):
|
||||
guess = search_language(string)
|
||||
return guess
|
||||
|
||||
def _skip_language_on_second_pass(self, mtree, node):
|
||||
"""Check if found node is a valid language node, or if it's a false positive.
|
||||
|
||||
:param mtree: Tree detected on first pass.
|
||||
:type mtree: :class:`guessit.matchtree.MatchTree`
|
||||
:param node: Node that contains a language Guess
|
||||
:type node: :class:`guessit.matchtree.MatchTree`
|
||||
|
||||
:return: True if a second pass skipping this node is required
|
||||
:rtype: bool
|
||||
"""
|
||||
unidentified_starts = {}
|
||||
unidentified_ends = {}
|
||||
|
||||
property_starts = {}
|
||||
property_ends = {}
|
||||
|
||||
title_starts = {}
|
||||
title_ends = {}
|
||||
|
||||
for unidentified_node in mtree.unidentified_leaves():
|
||||
unidentified_starts[unidentified_node.span[0]] = unidentified_node
|
||||
unidentified_ends[unidentified_node.span[1]] = unidentified_node
|
||||
|
||||
for property_node in mtree.leaves_containing('year'):
|
||||
property_starts[property_node.span[0]] = property_node
|
||||
property_ends[property_node.span[1]] = property_node
|
||||
|
||||
for title_node in mtree.leaves_containing(['title', 'series']):
|
||||
title_starts[title_node.span[0]] = title_node
|
||||
title_ends[title_node.span[1]] = title_node
|
||||
|
||||
return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\
|
||||
node.span[1] in title_starts.keys() and (node.span[0] == 0 or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys())
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
m = mtree.matched()
|
||||
to_skip_language_nodes = []
|
||||
|
||||
for lang_key in ('language', 'subtitleLanguage'):
|
||||
langs = {}
|
||||
lang_nodes = set(n for n in mtree.leaves_containing(lang_key))
|
||||
|
||||
for lang_node in lang_nodes:
|
||||
lang = lang_node.guess.get(lang_key, None)
|
||||
if self._skip_language_on_second_pass(mtree, lang_node):
|
||||
# Language probably split the title. Add to skip for 2nd pass.
|
||||
|
||||
# if filetype is subtitle and the language appears last, just before
|
||||
# the extension, then it is likely a subtitle language
|
||||
parts = clean_string(lang_node.root.value).split()
|
||||
if (m.get('type') in ['moviesubtitle', 'episodesubtitle'] and
|
||||
(parts.index(lang_node.value) == len(parts) - 2)):
|
||||
continue
|
||||
|
||||
to_skip_language_nodes.append(lang_node)
|
||||
elif not lang in langs:
|
||||
langs[lang] = lang_node
|
||||
else:
|
||||
# The same language was found. Keep the more confident one,
|
||||
# and add others to skip for 2nd pass.
|
||||
existing_lang_node = langs[lang]
|
||||
to_skip = None
|
||||
if (existing_lang_node.guess.confidence('language') >=
|
||||
lang_node.guess.confidence('language')):
|
||||
# lang_node is to remove
|
||||
to_skip = lang_node
|
||||
else:
|
||||
# existing_lang_node is to remove
|
||||
langs[lang] = lang_node
|
||||
to_skip = existing_lang_node
|
||||
to_skip_language_nodes.append(to_skip)
|
||||
|
||||
if to_skip_language_nodes:
|
||||
return {'skip_nodes': to_skip_language_nodes}
|
||||
return None
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
options = options or {}
|
||||
return 'nolanguage' not in options
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_language, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
|
||||
def promote_subtitle(self, node):
|
||||
node.guess.set('subtitleLanguage', node.guess['language'],
|
||||
confidence=node.guess.confidence('language'))
|
||||
del node.guess['language']
|
||||
|
||||
def post_process(self, mtree, options=None):
|
||||
# 1- try to promote language to subtitle language where it makes sense
|
||||
for node in mtree.nodes():
|
||||
if 'language' not in node.guess:
|
||||
continue
|
||||
|
||||
# - if we matched a language in a file with a sub extension and that
|
||||
# the group is the last group of the filename, it is probably the
|
||||
# language of the subtitle
|
||||
# (eg: 'xxx.english.srt')
|
||||
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
|
||||
node == mtree.leaves()[-2]):
|
||||
self.promote_subtitle(node)
|
||||
|
||||
# - if we find in the same explicit group
|
||||
# a subtitle prefix before the language,
|
||||
# or a subtitle suffix after the language,
|
||||
# then upgrade the language
|
||||
explicit_group = mtree.node_at(node.node_idx[:2])
|
||||
group_str = explicit_group.value.lower()
|
||||
|
||||
for sub_prefix in subtitle_prefixes:
|
||||
if (sub_prefix in find_words(group_str) and
|
||||
0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])):
|
||||
self.promote_subtitle(node)
|
||||
|
||||
for sub_suffix in subtitle_suffixes:
|
||||
if (sub_suffix in find_words(group_str) and
|
||||
(node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
|
||||
self.promote_subtitle(node)
|
||||
|
||||
# - if a language is in an explicit group just preceded by "st",
|
||||
# it is a subtitle language (eg: '...st[fr-eng]...')
|
||||
try:
|
||||
idx = node.node_idx
|
||||
previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
|
||||
if previous.value.lower()[-2:] == 'st':
|
||||
self.promote_subtitle(node)
|
||||
except IndexError:
|
||||
pass
|
177
lib/guessit/transfo/guess_movie_title_from_position.py
Normal file
177
lib/guessit/transfo/guess_movie_title_from_position.py
Normal file
|
@ -0,0 +1,177 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import found_property
|
||||
from guessit import u
|
||||
|
||||
|
||||
class GuessMovieTitleFromPosition(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -200)
|
||||
|
||||
def supported_properties(self):
|
||||
return ['title']
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
options = options or {}
|
||||
return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode')
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""
|
||||
try to identify the remaining unknown groups by looking at their
|
||||
position relative to other known elements
|
||||
"""
|
||||
basename = mtree.node_at((-2,))
|
||||
all_valid = lambda leaf: len(leaf.clean_value) > 0
|
||||
basename_leftover = basename.unidentified_leaves(valid=all_valid)
|
||||
|
||||
try:
|
||||
folder = mtree.node_at((-3,))
|
||||
folder_leftover = folder.unidentified_leaves()
|
||||
except ValueError:
|
||||
folder = None
|
||||
folder_leftover = []
|
||||
|
||||
self.log.debug('folder: %s' % u(folder_leftover))
|
||||
self.log.debug('basename: %s' % u(basename_leftover))
|
||||
|
||||
# specific cases:
|
||||
# if we find the same group both in the folder name and the filename,
|
||||
# it's a good candidate for title
|
||||
if (folder_leftover and basename_leftover and
|
||||
folder_leftover[0].clean_value == basename_leftover[0].clean_value):
|
||||
|
||||
found_property(folder_leftover[0], 'title', confidence=0.8)
|
||||
return
|
||||
|
||||
# specific cases:
|
||||
# if the basename contains a number first followed by an unidentified
|
||||
# group, and the folder only contains 1 unidentified one, then we have
|
||||
# a series
|
||||
# ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
|
||||
try:
|
||||
series = folder_leftover[0]
|
||||
filmNumber = basename_leftover[0]
|
||||
title = basename_leftover[1]
|
||||
|
||||
basename_leaves = basename.leaves()
|
||||
|
||||
num = int(filmNumber.clean_value)
|
||||
|
||||
self.log.debug('series: %s' % series.clean_value)
|
||||
self.log.debug('title: %s' % title.clean_value)
|
||||
if (series.clean_value != title.clean_value and
|
||||
series.clean_value != filmNumber.clean_value and
|
||||
basename_leaves.index(filmNumber) == 0 and
|
||||
basename_leaves.index(title) == 1):
|
||||
|
||||
found_property(title, 'title', confidence=0.6)
|
||||
found_property(series, 'filmSeries', confidence=0.6)
|
||||
found_property(filmNumber, 'filmNumber', num, confidence=0.6)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# specific cases:
|
||||
# - movies/tttttt (yyyy)/tttttt.ccc
|
||||
try:
|
||||
if mtree.node_at((-4, 0)).value.lower() == 'movies':
|
||||
folder = mtree.node_at((-3,))
|
||||
|
||||
# Note:too generic, might solve all the unittests as they all
|
||||
# contain 'movies' in their path
|
||||
#
|
||||
# if containing_folder.is_leaf() and not containing_folder.guess:
|
||||
# containing_folder.guess =
|
||||
# Guess({ 'title': clean_string(containing_folder.value) },
|
||||
# confidence=0.7)
|
||||
|
||||
year_group = folder.first_leaf_containing('year')
|
||||
groups_before = folder.previous_unidentified_leaves(year_group)
|
||||
|
||||
found_property(groups_before[0], 'title', confidence=0.8)
|
||||
return
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# if we have either format or videoCodec in the folder containing the file
|
||||
# or one of its parents, then we should probably look for the title in
|
||||
# there rather than in the basename
|
||||
try:
|
||||
props = mtree.previous_leaves_containing(mtree.children[-2],
|
||||
['videoCodec', 'format',
|
||||
'language'])
|
||||
except IndexError:
|
||||
props = []
|
||||
|
||||
if props:
|
||||
group_idx = props[0].node_idx[0]
|
||||
if all(g.node_idx[0] == group_idx for g in props):
|
||||
# if they're all in the same group, take leftover info from there
|
||||
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
|
||||
|
||||
if leftover:
|
||||
found_property(leftover[0], 'title', confidence=0.7)
|
||||
return
|
||||
|
||||
# look for title in basename if there are some remaining unidentified
|
||||
# groups there
|
||||
if basename_leftover:
|
||||
# if basename is only one word and the containing folder has at least
|
||||
# 3 words in it, we should take the title from the folder name
|
||||
# ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
|
||||
# ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
|
||||
if (basename_leftover[0].clean_value.count(' ') == 0 and
|
||||
folder_leftover and
|
||||
folder_leftover[0].clean_value.count(' ') >= 2):
|
||||
|
||||
found_property(folder_leftover[0], 'title', confidence=0.7)
|
||||
return
|
||||
|
||||
# if there are only many unidentified groups, take the first of which is
|
||||
# not inside brackets or parentheses.
|
||||
# ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
|
||||
if basename_leftover[0].is_explicit():
|
||||
for basename_leftover_elt in basename_leftover:
|
||||
if not basename_leftover_elt.is_explicit():
|
||||
found_property(basename_leftover_elt, 'title', confidence=0.8)
|
||||
return
|
||||
|
||||
# if all else fails, take the first remaining unidentified group in the
|
||||
# basename as title
|
||||
found_property(basename_leftover[0], 'title', confidence=0.6)
|
||||
return
|
||||
|
||||
# if there are no leftover groups in the basename, look in the folder name
|
||||
if folder_leftover:
|
||||
found_property(folder_leftover[0], 'title', confidence=0.5)
|
||||
return
|
||||
|
||||
# if nothing worked, look if we have a very small group at the beginning
|
||||
# of the basename
|
||||
basename = mtree.node_at((-2,))
|
||||
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
|
||||
if basename_leftover:
|
||||
found_property(basename_leftover[0], 'title', confidence=0.4)
|
||||
return
|
230
lib/guessit/transfo/guess_properties.py
Normal file
230
lib/guessit/transfo/guess_properties.py
Normal file
|
@ -0,0 +1,230 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer
|
||||
from guessit.patterns.extension import subtitle_exts, video_exts, info_exts
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
|
||||
|
||||
class GuessProperties(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 35)
|
||||
|
||||
self.container = PropertiesContainer()
|
||||
self.qualities = QualitiesContainer()
|
||||
|
||||
def register_property(propname, props):
|
||||
"""props a dict of {value: [patterns]}"""
|
||||
for canonical_form, patterns in props.items():
|
||||
if isinstance(patterns, tuple):
|
||||
patterns2, kwargs = patterns
|
||||
kwargs = dict(kwargs)
|
||||
kwargs['canonical_form'] = canonical_form
|
||||
self.container.register_property(propname, *patterns2, **kwargs)
|
||||
|
||||
else:
|
||||
self.container.register_property(propname, *patterns, canonical_form=canonical_form)
|
||||
|
||||
def register_quality(propname, quality_dict):
|
||||
"""props a dict of {canonical_form: quality}"""
|
||||
for canonical_form, quality in quality_dict.items():
|
||||
self.qualities.register_quality(propname, canonical_form, quality)
|
||||
|
||||
register_property('container', {'mp4': ['MP4']})
|
||||
|
||||
# http://en.wikipedia.org/wiki/Pirated_movie_release_types
|
||||
register_property('format', {'VHS': ['VHS'],
|
||||
'Cam': ['CAM', 'CAMRip'],
|
||||
'Telesync': ['TELESYNC', 'PDVD'],
|
||||
'Telesync': (['TS'], {'confidence': 0.2}),
|
||||
'Workprint': ['WORKPRINT', 'WP'],
|
||||
'Telecine': ['TELECINE', 'TC'],
|
||||
'PPV': ['PPV', 'PPV-Rip'], # Pay Per View
|
||||
'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
|
||||
'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
|
||||
'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS'],
|
||||
'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
|
||||
'VOD': ['VOD', 'VOD-Rip'],
|
||||
'WEBRip': ['WEB-Rip'],
|
||||
'WEB-DL': ['WEB-DL'],
|
||||
'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
|
||||
'BluRay': ['Blu-ray', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
|
||||
})
|
||||
|
||||
register_quality('format', {'VHS': -100,
|
||||
'Cam': -90,
|
||||
'Telesync': -80,
|
||||
'Workprint': -70,
|
||||
'Telecine': -60,
|
||||
'PPV': -50,
|
||||
'TV': -30,
|
||||
'DVB': -20,
|
||||
'DVD': 0,
|
||||
'HDTV': 20,
|
||||
'VOD': 40,
|
||||
'WEBRip': 50,
|
||||
'WEB-DL': 60,
|
||||
'HD-DVD': 80,
|
||||
'BluRay': 100
|
||||
})
|
||||
|
||||
register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
|
||||
'368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
|
||||
'480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
|
||||
'480p': (['hr'], {'confidence': 0.2}),
|
||||
'576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
|
||||
'720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
|
||||
'900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
|
||||
'1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
|
||||
'1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080(?:p?x?)'],
|
||||
'4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
|
||||
})
|
||||
|
||||
register_quality('screenSize', {'360p': -300,
|
||||
'368p': -200,
|
||||
'480p': -100,
|
||||
'576p': 0,
|
||||
'720p': 100,
|
||||
'900p': 130,
|
||||
'1080i': 180,
|
||||
'1080p': 200,
|
||||
'4K': 400
|
||||
})
|
||||
|
||||
_videoCodecProperty = {'Real': ['Rv\d{2}'], # http://en.wikipedia.org/wiki/RealVideo
|
||||
'Mpeg2': ['Mpeg2'],
|
||||
'DivX': ['DVDivX', 'DivX'],
|
||||
'XviD': ['XviD'],
|
||||
'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
|
||||
'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
|
||||
}
|
||||
|
||||
register_property('videoCodec', _videoCodecProperty)
|
||||
|
||||
register_quality('videoCodec', {'Real': -50,
|
||||
'Mpeg2': -30,
|
||||
'DivX': -10,
|
||||
'XviD': 0,
|
||||
'h264': 100,
|
||||
'h265': 150
|
||||
})
|
||||
|
||||
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
|
||||
# http://fr.wikipedia.org/wiki/H.264
|
||||
self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||
|
||||
register_quality('videoProfile', {'BP': -20,
|
||||
'XP': -10,
|
||||
'MP': 0,
|
||||
'HP': 10,
|
||||
'10bit': 15,
|
||||
'Hi422P': 25,
|
||||
'Hi444PP': 35
|
||||
})
|
||||
|
||||
# has nothing to do here (or on filenames for that matter), but some
|
||||
# releases use it and it helps to identify release groups, so we adapt
|
||||
register_property('videoApi', {'DXVA': ['DXVA']})
|
||||
|
||||
register_property('audioCodec', {'MP3': ['MP3'],
|
||||
'DolbyDigital': ['DD'],
|
||||
'AAC': ['AAC'],
|
||||
'AC3': ['AC3'],
|
||||
'Flac': ['FLAC'],
|
||||
'DTS': ['DTS'],
|
||||
'TrueHD': ['True-HD']
|
||||
})
|
||||
|
||||
register_quality('audioCodec', {'MP3': 10,
|
||||
'DolbyDigital': 30,
|
||||
'AAC': 35,
|
||||
'AC3': 40,
|
||||
'Flac': 45,
|
||||
'DTS': 60,
|
||||
'TrueHD': 70
|
||||
})
|
||||
|
||||
self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
|
||||
self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
|
||||
self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
|
||||
self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
|
||||
self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))
|
||||
|
||||
register_quality('audioProfile', {'HD': 20,
|
||||
'HDMA': 50,
|
||||
'LC': 0,
|
||||
'HQ': 0,
|
||||
'HE': 20
|
||||
})
|
||||
|
||||
register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch'],
|
||||
'5.1': ['5[\W_]1', '5ch'],
|
||||
'2.0': ['2[\W_]0', '2ch', 'stereo'],
|
||||
'1.0': ['1[\W_]0', '1ch', 'mono']
|
||||
})
|
||||
|
||||
register_quality('audioChannels', {'7.1': 200,
|
||||
'5.1': 100,
|
||||
'2.0': 0,
|
||||
'1.0': -100
|
||||
})
|
||||
|
||||
self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')
|
||||
|
||||
register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
|
||||
'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
|
||||
'DualAudio': ['Dual-Audio'],
|
||||
'WideScreen': ['ws', 'wide-screen'],
|
||||
})
|
||||
|
||||
self.container.register_property('other', 'Real', 'Fix', canonical_form="Proper", validator=WeakValidator())
|
||||
self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form="Proper")
|
||||
|
||||
self.container.register_canonical_properties('other', 'R5', 'Screener', '3D', 'HD', 'HQ', 'DDC')
|
||||
self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())
|
||||
|
||||
for prop in self.container.get_properties('format'):
|
||||
self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')
|
||||
|
||||
for exts in (subtitle_exts, info_exts, video_exts):
|
||||
for container in exts:
|
||||
self.container.register_property('container', container, confidence=0.3)
|
||||
|
||||
def guess_properties(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node)
|
||||
return self.container.as_guess(found, string)
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||
|
||||
def rate_quality(self, guess, *props):
|
||||
return self.qualities.rate_quality(guess, *props)
|
149
lib/guessit/transfo/guess_release_group.py
Normal file
149
lib/guessit/transfo/guess_release_group.py
Normal file
|
@ -0,0 +1,149 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder, found_property, found_guess
|
||||
from guessit.containers import PropertiesContainer
|
||||
from guessit.patterns import sep
|
||||
from guessit.guess import Guess
|
||||
from guessit.textutils import strip_brackets
|
||||
|
||||
|
||||
class GuessReleaseGroup(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -190)
|
||||
self.container = PropertiesContainer(canonical_from_pattern=False)
|
||||
self._allowed_groupname_pattern = '[\w@#€£$&]'
|
||||
self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
|
||||
lambda elt: self._is_number(elt),
|
||||
]
|
||||
# If the previous property in this list, the match will be considered as safe
|
||||
# and group name can contain a separator.
|
||||
self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels']
|
||||
|
||||
self.container.sep_replace_char = '-'
|
||||
self.container.canonical_from_pattern = False
|
||||
self.container.enhance = True
|
||||
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+')
|
||||
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def _is_number(self, s):
|
||||
try:
|
||||
int(s)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def validate_group_name(self, guess):
|
||||
val = guess['releaseGroup']
|
||||
if len(val) >= 2:
|
||||
|
||||
if '-' in val:
|
||||
checked_val = ""
|
||||
for elt in val.split('-'):
|
||||
forbidden = False
|
||||
for forbidden_lambda in self._forbidden_groupname_lambda:
|
||||
forbidden = forbidden_lambda(elt.lower())
|
||||
if forbidden:
|
||||
break
|
||||
if not forbidden:
|
||||
if checked_val:
|
||||
checked_val += '-'
|
||||
checked_val += elt
|
||||
else:
|
||||
break
|
||||
val = checked_val
|
||||
if not val:
|
||||
return False
|
||||
guess['releaseGroup'] = val
|
||||
|
||||
forbidden = False
|
||||
for forbidden_lambda in self._forbidden_groupname_lambda:
|
||||
forbidden = forbidden_lambda(val.lower())
|
||||
if forbidden:
|
||||
break
|
||||
if not forbidden:
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_leaf_previous(self, leaf, node):
|
||||
if leaf.span[1] <= node.span[0]:
|
||||
for idx in range(leaf.span[1], node.span[0]):
|
||||
if not leaf.root.value[idx] in sep:
|
||||
return False
|
||||
return True
|
||||
return False
|
||||
|
||||
def guess_release_group(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node, 'releaseGroup')
|
||||
guess = self.container.as_guess(found, string, self.validate_group_name, sep_replacement='-')
|
||||
validated_guess = None
|
||||
if guess:
|
||||
explicit_group_node = node.group_node()
|
||||
if explicit_group_node:
|
||||
for leaf in explicit_group_node.leaves_containing(self.previous_safe_properties):
|
||||
if self.is_leaf_previous(leaf, node):
|
||||
if leaf.root.value[leaf.span[1]] == '-':
|
||||
guess.metadata().confidence = 1
|
||||
else:
|
||||
guess.metadata().confidence = 0.7
|
||||
validated_guess = guess
|
||||
|
||||
if not validated_guess:
|
||||
# If previous group last leaf is identified as a safe property,
|
||||
# consider the raw value as a releaseGroup
|
||||
previous_group_node = node.previous_group_node()
|
||||
if previous_group_node:
|
||||
for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
|
||||
if self.is_leaf_previous(leaf, node):
|
||||
guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
|
||||
if self.validate_group_name(guess):
|
||||
node.guess = guess
|
||||
validated_guess = guess
|
||||
|
||||
if validated_guess:
|
||||
# If following group nodes have only one unidentified leaf, it belongs to the release group
|
||||
next_group_node = node
|
||||
|
||||
while True:
|
||||
next_group_node = next_group_node.next_group_node()
|
||||
if next_group_node:
|
||||
leaves = next_group_node.leaves()
|
||||
if len(leaves) == 1 and not leaves[0].guess:
|
||||
validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
|
||||
leaves[0].guess = validated_guess
|
||||
else:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
if validated_guess:
|
||||
# Strip brackets
|
||||
validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])
|
||||
|
||||
return validated_guess
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
58
lib/guessit/transfo/guess_video_rexps.py
Normal file
58
lib/guessit/transfo/guess_video_rexps.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, \
|
||||
unicode_literals
|
||||
|
||||
from guessit.patterns import _psep
|
||||
from guessit.containers import PropertiesContainer
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from guessit.patterns.numeral import parse_numeral
|
||||
|
||||
|
||||
class GuessVideoRexps(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 25)
|
||||
|
||||
self.container = PropertiesContainer(canonical_from_pattern=False)
|
||||
|
||||
self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral)
|
||||
self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral)
|
||||
|
||||
self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
|
||||
|
||||
self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
|
||||
|
||||
self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition')
|
||||
self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition')
|
||||
self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition')
|
||||
self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition')
|
||||
self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def guess_video_rexps(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node)
|
||||
return self.container.as_guess(found, string)
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
69
lib/guessit/transfo/guess_weak_episodes_rexps.py
Normal file
69
lib/guessit/transfo/guess_weak_episodes_rexps.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from guessit.patterns import sep
|
||||
from guessit.containers import PropertiesContainer
|
||||
from guessit.patterns.numeral import numeral, parse_numeral
|
||||
from guessit.date import valid_year
|
||||
|
||||
|
||||
class GuessWeakEpisodesRexps(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 15)
|
||||
|
||||
self.properties = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||
|
||||
def _formater(episodeNumber):
|
||||
epnum = parse_numeral(episodeNumber)
|
||||
if not valid_year(epnum):
|
||||
if epnum > 100:
|
||||
season, epnum = epnum // 100, epnum % 100
|
||||
# episodes which have a season > 50 are most likely errors
|
||||
# (Simpson is at 25!)
|
||||
if season > 50:
|
||||
return None
|
||||
return {'season': season, 'episodeNumber': epnum}
|
||||
else:
|
||||
return epnum
|
||||
|
||||
self.properties.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater)
|
||||
self.properties.register_property('episodeNumber', '(?:episode)' + sep + '(' + numeral + ')[^0-9]', confidence=0.3)
|
||||
|
||||
def supported_properties(self):
|
||||
return self.properties.get_supported_properties()
|
||||
|
||||
def guess_weak_episodes_rexps(self, string, node=None, options=None):
|
||||
if node and 'episodeNumber' in node.root.info:
|
||||
return None
|
||||
|
||||
properties = self.properties.find_properties(string, node)
|
||||
guess = self.properties.as_guess(properties, string)
|
||||
|
||||
return guess
|
||||
|
||||
def should_process(self, mtree, options=None):
|
||||
return mtree.guess.get('type', '').startswith('episode')
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
|
66
lib/guessit/transfo/guess_website.py
Normal file
66
lib/guessit/transfo/guess_website.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, \
|
||||
unicode_literals
|
||||
|
||||
from guessit.patterns import build_or_pattern
|
||||
from guessit.containers import PropertiesContainer
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
||||
|
||||
class GuessWebsite(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 45)
|
||||
|
||||
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||
|
||||
tlds = []
|
||||
|
||||
f = resource_stream('guessit', 'tlds-alpha-by-domain.txt')
|
||||
f.readline()
|
||||
next(f)
|
||||
for tld in f:
|
||||
tld = tld.strip()
|
||||
if b'--' in tld:
|
||||
continue
|
||||
tlds.append(tld.decode("utf-8"))
|
||||
f.close()
|
||||
|
||||
tlds_pattern = build_or_pattern(tlds) # All registered domain extension
|
||||
safe_tlds_pattern = build_or_pattern(['com', 'org', 'net']) # For sure a website extension
|
||||
safe_subdomains_pattern = build_or_pattern(['www']) # For sure a website subdomain
|
||||
safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net']) # Those words before a tlds are sure
|
||||
|
||||
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
|
||||
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
|
||||
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
|
||||
|
||||
def supported_properties(self):
|
||||
return self.container.get_supported_properties()
|
||||
|
||||
def guess_website(self, string, node=None, options=None):
|
||||
found = self.container.find_properties(string, node, 'website')
|
||||
return self.container.as_guess(found, string)
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
49
lib/guessit/transfo/guess_year.py
Normal file
49
lib/guessit/transfo/guess_year.py
Normal file
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.matcher import GuessFinder
|
||||
from guessit.date import search_year
|
||||
|
||||
|
||||
class GuessYear(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, -160)
|
||||
|
||||
def supported_properties(self):
|
||||
return ['year']
|
||||
|
||||
def guess_year(self, string, node=None, options=None):
|
||||
year, span = search_year(string)
|
||||
if year:
|
||||
return {'year': year}, span
|
||||
else:
|
||||
return None, None
|
||||
|
||||
def second_pass_options(self, mtree, options=None):
|
||||
year_nodes = mtree.leaves_containing('year')
|
||||
if len(year_nodes) > 1:
|
||||
return {'skip_nodes': year_nodes[:len(year_nodes) - 1]}
|
||||
return None
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
GuessFinder(self.guess_year, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
49
lib/guessit/transfo/split_explicit_groups.py
Normal file
49
lib/guessit/transfo/split_explicit_groups.py
Normal file
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.textutils import find_first_level_groups
|
||||
from guessit.patterns import group_delimiters
|
||||
from functools import reduce
|
||||
|
||||
|
||||
class SplitExplicitGroups(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 245)
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""split each of those into explicit groups (separated by parentheses or square brackets)
|
||||
|
||||
:return: return the string split into explicit groups, that is, those either
|
||||
between parenthese, square brackets or curly braces, and those separated
|
||||
by a dash."""
|
||||
for c in mtree.children:
|
||||
groups = find_first_level_groups(c.value, group_delimiters[0])
|
||||
for delimiters in group_delimiters:
|
||||
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
|
||||
groups = reduce(flatten, groups, [])
|
||||
|
||||
# do not do this at this moment, it is not strong enough and can break other
|
||||
# patterns, such as dates, etc...
|
||||
# groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
|
||||
|
||||
c.split_on_components(groups)
|
47
lib/guessit/transfo/split_on_dash.py
Normal file
47
lib/guessit/transfo/split_on_dash.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit.patterns import sep
|
||||
import re
|
||||
|
||||
|
||||
class SplitOnDash(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 190)
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""split into '-' separated subgroups (with required separator chars
|
||||
around the dash)
|
||||
"""
|
||||
for node in mtree.unidentified_leaves():
|
||||
indices = []
|
||||
|
||||
pattern = re.compile(sep + '-' + sep)
|
||||
match = pattern.search(node.value)
|
||||
while match:
|
||||
span = match.span()
|
||||
indices.extend([span[0], span[1]])
|
||||
match = pattern.search(node.value, span[1])
|
||||
|
||||
if indices:
|
||||
node.partition(indices)
|
45
lib/guessit/transfo/split_path_components.py
Normal file
45
lib/guessit/transfo/split_path_components.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from guessit.plugins.transformers import Transformer
|
||||
from guessit import fileutils
|
||||
from os.path import splitext
|
||||
|
||||
|
||||
class SplitPathComponents(Transformer):
|
||||
def __init__(self):
|
||||
Transformer.__init__(self, 255)
|
||||
|
||||
def process(self, mtree, options=None):
|
||||
"""first split our path into dirs + basename + ext
|
||||
|
||||
:return: the filename split into [ dir*, basename, ext ]
|
||||
"""
|
||||
if not options.get('name_only'):
|
||||
components = fileutils.split_path(mtree.value)
|
||||
basename = components.pop(-1)
|
||||
components += list(splitext(basename))
|
||||
components[-1] = components[-1][1:] # remove the '.' from the extension
|
||||
|
||||
mtree.split_on_components(components)
|
||||
else:
|
||||
mtree.split_on_components([mtree.value, ''])
|
36
lib/stevedore/__init__.py
Normal file
36
lib/stevedore/__init__.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
# flake8: noqa
|
||||
|
||||
__all__ = [
|
||||
'ExtensionManager',
|
||||
'EnabledExtensionManager',
|
||||
'NamedExtensionManager',
|
||||
'HookManager',
|
||||
'DriverManager',
|
||||
]
|
||||
|
||||
from .extension import ExtensionManager
|
||||
from .enabled import EnabledExtensionManager
|
||||
from .named import NamedExtensionManager
|
||||
from .hook import HookManager
|
||||
from .driver import DriverManager
|
||||
|
||||
import logging
|
||||
|
||||
# Configure a NullHandler for our log messages in case
|
||||
# the app we're used from does not set up logging.
|
||||
LOG = logging.getLogger('stevedore')
|
||||
|
||||
if hasattr(logging, 'NullHandler'):
|
||||
LOG.addHandler(logging.NullHandler())
|
||||
else:
|
||||
class NullHandler(logging.Handler):
|
||||
def handle(self, record):
|
||||
pass
|
||||
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
def createLock(self):
|
||||
self.lock = None
|
||||
|
||||
LOG.addHandler(NullHandler())
|
216
lib/stevedore/dispatch.py
Normal file
216
lib/stevedore/dispatch.py
Normal file
|
@ -0,0 +1,216 @@
|
|||
import logging
|
||||
|
||||
from .enabled import EnabledExtensionManager
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DispatchExtensionManager(EnabledExtensionManager):
|
||||
"""Loads all plugins and filters on execution.
|
||||
|
||||
This is useful for long-running processes that need to pass
|
||||
different inputs to different extensions.
|
||||
|
||||
:param namespace: The namespace for the entry points.
|
||||
:type namespace: str
|
||||
:param check_func: Function to determine which extensions to load.
|
||||
:type check_func: callable
|
||||
:param invoke_on_load: Boolean controlling whether to invoke the
|
||||
object returned by the entry point after the driver is loaded.
|
||||
:type invoke_on_load: bool
|
||||
:param invoke_args: Positional arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_args: tuple
|
||||
:param invoke_kwds: Named arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_kwds: dict
|
||||
:param propagate_map_exceptions: Boolean controlling whether exceptions
|
||||
are propagated up through the map call or whether they are logged and
|
||||
then ignored
|
||||
:type invoke_on_load: bool
|
||||
"""
|
||||
|
||||
def map(self, filter_func, func, *args, **kwds):
|
||||
"""Iterate over the extensions invoking func() for any where
|
||||
filter_func() returns True.
|
||||
|
||||
The signature of filter_func() should be::
|
||||
|
||||
def filter_func(ext, *args, **kwds):
|
||||
pass
|
||||
|
||||
The first argument to filter_func(), 'ext', is the
|
||||
:class:`~stevedore.extension.Extension`
|
||||
instance. filter_func() should return True if the extension
|
||||
should be invoked for the input arguments.
|
||||
|
||||
The signature for func() should be::
|
||||
|
||||
def func(ext, *args, **kwds):
|
||||
pass
|
||||
|
||||
The first argument to func(), 'ext', is the
|
||||
:class:`~stevedore.extension.Extension` instance.
|
||||
|
||||
Exceptions raised from within func() are propagated up and
|
||||
processing stopped if self.propagate_map_exceptions is True,
|
||||
otherwise they are logged and ignored.
|
||||
|
||||
:param filter_func: Callable to test each extension.
|
||||
:param func: Callable to invoke for each extension.
|
||||
:param args: Variable arguments to pass to func()
|
||||
:param kwds: Keyword arguments to pass to func()
|
||||
:returns: List of values returned from func()
|
||||
"""
|
||||
if not self.extensions:
|
||||
# FIXME: Use a more specific exception class here.
|
||||
raise RuntimeError('No %s extensions found' % self.namespace)
|
||||
response = []
|
||||
for e in self.extensions:
|
||||
if filter_func(e, *args, **kwds):
|
||||
self._invoke_one_plugin(response.append, func, e, args, kwds)
|
||||
return response
|
||||
|
||||
def map_method(self, filter_func, method_name, *args, **kwds):
|
||||
"""Iterate over the extensions invoking each one's object method called
|
||||
`method_name` for any where filter_func() returns True.
|
||||
|
||||
This is equivalent of using :meth:`map` with func set to
|
||||
`lambda x: x.obj.method_name()`
|
||||
while being more convenient.
|
||||
|
||||
Exceptions raised from within the called method are propagated up
|
||||
and processing stopped if self.propagate_map_exceptions is True,
|
||||
otherwise they are logged and ignored.
|
||||
|
||||
.. versionadded:: 0.12
|
||||
|
||||
:param filter_func: Callable to test each extension.
|
||||
:param method_name: The extension method name to call
|
||||
for each extension.
|
||||
:param args: Variable arguments to pass to method
|
||||
:param kwds: Keyword arguments to pass to method
|
||||
:returns: List of values returned from methods
|
||||
"""
|
||||
return self.map(filter_func, self._call_extension_method,
|
||||
method_name, *args, **kwds)
|
||||
|
||||
|
||||
class NameDispatchExtensionManager(DispatchExtensionManager):
|
||||
"""Loads all plugins and filters on execution.
|
||||
|
||||
This is useful for long-running processes that need to pass
|
||||
different inputs to different extensions and can predict the name
|
||||
of the extensions before calling them.
|
||||
|
||||
The check_func argument should return a boolean, with ``True``
|
||||
indicating that the extension should be loaded and made available
|
||||
and ``False`` indicating that the extension should be ignored.
|
||||
|
||||
:param namespace: The namespace for the entry points.
|
||||
:type namespace: str
|
||||
:param check_func: Function to determine which extensions to load.
|
||||
:type check_func: callable
|
||||
:param invoke_on_load: Boolean controlling whether to invoke the
|
||||
object returned by the entry point after the driver is loaded.
|
||||
:type invoke_on_load: bool
|
||||
:param invoke_args: Positional arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_args: tuple
|
||||
:param invoke_kwds: Named arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_kwds: dict
|
||||
:param propagate_map_exceptions: Boolean controlling whether exceptions
|
||||
are propagated up through the map call or whether they are logged and
|
||||
then ignored
|
||||
:type invoke_on_load: bool
|
||||
:param on_load_failure_callback: Callback function that will be called when
|
||||
a entrypoint can not be loaded. The arguments that will be provided
|
||||
when this is called (when an entrypoint fails to load) are
|
||||
(manager, entrypoint, exception)
|
||||
:type on_load_failure_callback: function
|
||||
:param verify_requirements: Use setuptools to enforce the
|
||||
dependencies of the plugin(s) being loaded. Defaults to False.
|
||||
:type verify_requirements: bool
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, namespace, check_func, invoke_on_load=False,
|
||||
invoke_args=(), invoke_kwds={},
|
||||
propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
super(NameDispatchExtensionManager, self).__init__(
|
||||
namespace=namespace,
|
||||
check_func=check_func,
|
||||
invoke_on_load=invoke_on_load,
|
||||
invoke_args=invoke_args,
|
||||
invoke_kwds=invoke_kwds,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback,
|
||||
verify_requirements=verify_requirements,
|
||||
)
|
||||
|
||||
def _init_plugins(self, extensions):
|
||||
super(NameDispatchExtensionManager, self)._init_plugins(extensions)
|
||||
self.by_name = dict((e.name, e) for e in self.extensions)
|
||||
|
||||
def map(self, names, func, *args, **kwds):
|
||||
"""Iterate over the extensions invoking func() for any where
|
||||
the name is in the given list of names.
|
||||
|
||||
The signature for func() should be::
|
||||
|
||||
def func(ext, *args, **kwds):
|
||||
pass
|
||||
|
||||
The first argument to func(), 'ext', is the
|
||||
:class:`~stevedore.extension.Extension` instance.
|
||||
|
||||
Exceptions raised from within func() are propagated up and
|
||||
processing stopped if self.propagate_map_exceptions is True,
|
||||
otherwise they are logged and ignored.
|
||||
|
||||
:param names: List or set of name(s) of extension(s) to invoke.
|
||||
:param func: Callable to invoke for each extension.
|
||||
:param args: Variable arguments to pass to func()
|
||||
:param kwds: Keyword arguments to pass to func()
|
||||
:returns: List of values returned from func()
|
||||
"""
|
||||
response = []
|
||||
for name in names:
|
||||
try:
|
||||
e = self.by_name[name]
|
||||
except KeyError:
|
||||
LOG.debug('Missing extension %r being ignored', name)
|
||||
else:
|
||||
self._invoke_one_plugin(response.append, func, e, args, kwds)
|
||||
return response
|
||||
|
||||
def map_method(self, names, method_name, *args, **kwds):
|
||||
"""Iterate over the extensions invoking each one's object method called
|
||||
`method_name` for any where the name is in the given list of names.
|
||||
|
||||
This is equivalent of using :meth:`map` with func set to
|
||||
`lambda x: x.obj.method_name()`
|
||||
while being more convenient.
|
||||
|
||||
Exceptions raised from within the called method are propagated up
|
||||
and processing stopped if self.propagate_map_exceptions is True,
|
||||
otherwise they are logged and ignored.
|
||||
|
||||
.. versionadded:: 0.12
|
||||
|
||||
:param names: List or set of name(s) of extension(s) to invoke.
|
||||
:param method_name: The extension method name
|
||||
to call for each extension.
|
||||
:param args: Variable arguments to pass to method
|
||||
:param kwds: Keyword arguments to pass to method
|
||||
:returns: List of values returned from methods
|
||||
"""
|
||||
return self.map(names, self._call_extension_method,
|
||||
method_name, *args, **kwds)
|
126
lib/stevedore/driver.py
Normal file
126
lib/stevedore/driver.py
Normal file
|
@ -0,0 +1,126 @@
|
|||
from .named import NamedExtensionManager
|
||||
|
||||
|
||||
class DriverManager(NamedExtensionManager):
|
||||
"""Load a single plugin with a given name from the namespace.
|
||||
|
||||
:param namespace: The namespace for the entry points.
|
||||
:type namespace: str
|
||||
:param name: The name of the driver to load.
|
||||
:type name: str
|
||||
:param invoke_on_load: Boolean controlling whether to invoke the
|
||||
object returned by the entry point after the driver is loaded.
|
||||
:type invoke_on_load: bool
|
||||
:param invoke_args: Positional arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_args: tuple
|
||||
:param invoke_kwds: Named arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_kwds: dict
|
||||
:param on_load_failure_callback: Callback function that will be called when
|
||||
a entrypoint can not be loaded. The arguments that will be provided
|
||||
when this is called (when an entrypoint fails to load) are
|
||||
(manager, entrypoint, exception)
|
||||
:type on_load_failure_callback: function
|
||||
:param verify_requirements: Use setuptools to enforce the
|
||||
dependencies of the plugin(s) being loaded. Defaults to False.
|
||||
:type verify_requirements: bool
|
||||
"""
|
||||
|
||||
def __init__(self, namespace, name,
|
||||
invoke_on_load=False, invoke_args=(), invoke_kwds={},
|
||||
on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
super(DriverManager, self).__init__(
|
||||
namespace=namespace,
|
||||
names=[name],
|
||||
invoke_on_load=invoke_on_load,
|
||||
invoke_args=invoke_args,
|
||||
invoke_kwds=invoke_kwds,
|
||||
on_load_failure_callback=on_load_failure_callback,
|
||||
verify_requirements=verify_requirements,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def make_test_instance(cls, extension, namespace='TESTING',
|
||||
propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
"""Construct a test DriverManager
|
||||
|
||||
Test instances are passed a list of extensions to work from rather
|
||||
than loading them from entry points.
|
||||
|
||||
:param extension: Pre-configured Extension instance
|
||||
:type extension: :class:`~stevedore.extension.Extension`
|
||||
:param namespace: The namespace for the manager; used only for
|
||||
identification since the extensions are passed in.
|
||||
:type namespace: str
|
||||
:param propagate_map_exceptions: Boolean controlling whether exceptions
|
||||
are propagated up through the map call or whether they are logged
|
||||
and then ignored
|
||||
:type propagate_map_exceptions: bool
|
||||
:param on_load_failure_callback: Callback function that will
|
||||
be called when a entrypoint can not be loaded. The
|
||||
arguments that will be provided when this is called (when
|
||||
an entrypoint fails to load) are (manager, entrypoint,
|
||||
exception)
|
||||
:type on_load_failure_callback: function
|
||||
:param verify_requirements: Use setuptools to enforce the
|
||||
dependencies of the plugin(s) being loaded. Defaults to False.
|
||||
:type verify_requirements: bool
|
||||
:return: The manager instance, initialized for testing
|
||||
|
||||
"""
|
||||
|
||||
o = super(DriverManager, cls).make_test_instance(
|
||||
[extension], namespace=namespace,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback,
|
||||
verify_requirements=verify_requirements)
|
||||
return o
|
||||
|
||||
def _init_plugins(self, extensions):
|
||||
super(DriverManager, self)._init_plugins(extensions)
|
||||
|
||||
if not self.extensions:
|
||||
name = self._names[0]
|
||||
raise RuntimeError('No %r driver found, looking for %r' %
|
||||
(self.namespace, name))
|
||||
if len(self.extensions) > 1:
|
||||
discovered_drivers = ','.join(e.entry_point_target
|
||||
for e in self.extensions)
|
||||
|
||||
raise RuntimeError('Multiple %r drivers found: %s' %
|
||||
(self.namespace, discovered_drivers))
|
||||
|
||||
def __call__(self, func, *args, **kwds):
|
||||
"""Invokes func() for the single loaded extension.
|
||||
|
||||
The signature for func() should be::
|
||||
|
||||
def func(ext, *args, **kwds):
|
||||
pass
|
||||
|
||||
The first argument to func(), 'ext', is the
|
||||
:class:`~stevedore.extension.Extension` instance.
|
||||
|
||||
Exceptions raised from within func() are logged and ignored.
|
||||
|
||||
:param func: Callable to invoke for each extension.
|
||||
:param args: Variable arguments to pass to func()
|
||||
:param kwds: Keyword arguments to pass to func()
|
||||
:returns: List of values returned from func()
|
||||
"""
|
||||
results = self.map(func, *args, **kwds)
|
||||
if results:
|
||||
return results[0]
|
||||
|
||||
@property
|
||||
def driver(self):
|
||||
"""Returns the driver being used by this manager.
|
||||
"""
|
||||
ext = self.extensions[0]
|
||||
return ext.obj if ext.obj else ext.plugin
|
71
lib/stevedore/enabled.py
Normal file
71
lib/stevedore/enabled.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
import logging
|
||||
|
||||
from .extension import ExtensionManager
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EnabledExtensionManager(ExtensionManager):
|
||||
"""Loads only plugins that pass a check function.
|
||||
|
||||
The check_func argument should return a boolean, with ``True``
|
||||
indicating that the extension should be loaded and made available
|
||||
and ``False`` indicating that the extension should be ignored.
|
||||
|
||||
:param namespace: The namespace for the entry points.
|
||||
:type namespace: str
|
||||
:param check_func: Function to determine which extensions to load.
|
||||
:type check_func: callable
|
||||
:param invoke_on_load: Boolean controlling whether to invoke the
|
||||
object returned by the entry point after the driver is loaded.
|
||||
:type invoke_on_load: bool
|
||||
:param invoke_args: Positional arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_args: tuple
|
||||
:param invoke_kwds: Named arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_kwds: dict
|
||||
:param propagate_map_exceptions: Boolean controlling whether exceptions
|
||||
are propagated up through the map call or whether they are logged and
|
||||
then ignored
|
||||
:type propagate_map_exceptions: bool
|
||||
:param on_load_failure_callback: Callback function that will be called when
|
||||
a entrypoint can not be loaded. The arguments that will be provided
|
||||
when this is called (when an entrypoint fails to load) are
|
||||
(manager, entrypoint, exception)
|
||||
:type on_load_failure_callback: function
|
||||
:param verify_requirements: Use setuptools to enforce the
|
||||
dependencies of the plugin(s) being loaded. Defaults to False.
|
||||
:type verify_requirements: bool
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, namespace, check_func, invoke_on_load=False,
|
||||
invoke_args=(), invoke_kwds={},
|
||||
propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None,
|
||||
verify_requirements=False,):
|
||||
self.check_func = check_func
|
||||
super(EnabledExtensionManager, self).__init__(
|
||||
namespace,
|
||||
invoke_on_load=invoke_on_load,
|
||||
invoke_args=invoke_args,
|
||||
invoke_kwds=invoke_kwds,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback,
|
||||
verify_requirements=verify_requirements,
|
||||
)
|
||||
|
||||
def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds,
|
||||
verify_requirements):
|
||||
ext = super(EnabledExtensionManager, self)._load_one_plugin(
|
||||
ep, invoke_on_load, invoke_args, invoke_kwds,
|
||||
verify_requirements,
|
||||
)
|
||||
if ext and not self.check_func(ext):
|
||||
LOG.debug('ignoring extension %r', ep.name)
|
||||
return None
|
||||
return ext
|
276
lib/stevedore/extension.py
Normal file
276
lib/stevedore/extension.py
Normal file
|
@ -0,0 +1,276 @@
|
|||
"""ExtensionManager
|
||||
"""
|
||||
|
||||
import pkg_resources
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Extension(object):
|
||||
"""Book-keeping object for tracking extensions.
|
||||
|
||||
The arguments passed to the constructor are saved as attributes of
|
||||
the instance using the same names, and can be accessed by the
|
||||
callables passed to :meth:`map` or when iterating over an
|
||||
:class:`ExtensionManager` directly.
|
||||
|
||||
:param name: The entry point name.
|
||||
:type name: str
|
||||
:param entry_point: The EntryPoint instance returned by
|
||||
:mod:`pkg_resources`.
|
||||
:type entry_point: EntryPoint
|
||||
:param plugin: The value returned by entry_point.load()
|
||||
:param obj: The object returned by ``plugin(*args, **kwds)`` if the
|
||||
manager invoked the extension on load.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, name, entry_point, plugin, obj):
|
||||
self.name = name
|
||||
self.entry_point = entry_point
|
||||
self.plugin = plugin
|
||||
self.obj = obj
|
||||
|
||||
@property
|
||||
def entry_point_target(self):
|
||||
"""The module and attribute referenced by this extension's entry_point.
|
||||
|
||||
:return: A string representation of the target of the entry point in
|
||||
'dotted.module:object' format.
|
||||
"""
|
||||
return '%s:%s' % (self.entry_point.module_name,
|
||||
self.entry_point.attrs[0])
|
||||
|
||||
|
||||
class ExtensionManager(object):
|
||||
"""Base class for all of the other managers.
|
||||
|
||||
:param namespace: The namespace for the entry points.
|
||||
:type namespace: str
|
||||
:param invoke_on_load: Boolean controlling whether to invoke the
|
||||
object returned by the entry point after the driver is loaded.
|
||||
:type invoke_on_load: bool
|
||||
:param invoke_args: Positional arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_args: tuple
|
||||
:param invoke_kwds: Named arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_kwds: dict
|
||||
:param propagate_map_exceptions: Boolean controlling whether exceptions
|
||||
are propagated up through the map call or whether they are logged and
|
||||
then ignored
|
||||
:type propagate_map_exceptions: bool
|
||||
:param on_load_failure_callback: Callback function that will be called when
|
||||
a entrypoint can not be loaded. The arguments that will be provided
|
||||
when this is called (when an entrypoint fails to load) are
|
||||
(manager, entrypoint, exception)
|
||||
:type on_load_failure_callback: function
|
||||
:param verify_requirements: Use setuptools to enforce the
|
||||
dependencies of the plugin(s) being loaded. Defaults to False.
|
||||
:type verify_requirements: bool
|
||||
"""
|
||||
|
||||
def __init__(self, namespace,
|
||||
invoke_on_load=False,
|
||||
invoke_args=(),
|
||||
invoke_kwds={},
|
||||
propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
self._init_attributes(
|
||||
namespace,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback)
|
||||
extensions = self._load_plugins(invoke_on_load,
|
||||
invoke_args,
|
||||
invoke_kwds,
|
||||
verify_requirements)
|
||||
self._init_plugins(extensions)
|
||||
|
||||
@classmethod
|
||||
def make_test_instance(cls, extensions, namespace='TESTING',
|
||||
propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
"""Construct a test ExtensionManager
|
||||
|
||||
Test instances are passed a list of extensions to work from rather
|
||||
than loading them from entry points.
|
||||
|
||||
:param extensions: Pre-configured Extension instances to use
|
||||
:type extensions: list of :class:`~stevedore.extension.Extension`
|
||||
:param namespace: The namespace for the manager; used only for
|
||||
identification since the extensions are passed in.
|
||||
:type namespace: str
|
||||
:param propagate_map_exceptions: When calling map, controls whether
|
||||
exceptions are propagated up through the map call or whether they
|
||||
are logged and then ignored
|
||||
:type propagate_map_exceptions: bool
|
||||
:param on_load_failure_callback: Callback function that will
|
||||
be called when a entrypoint can not be loaded. The
|
||||
arguments that will be provided when this is called (when
|
||||
an entrypoint fails to load) are (manager, entrypoint,
|
||||
exception)
|
||||
:type on_load_failure_callback: function
|
||||
:param verify_requirements: Use setuptools to enforce the
|
||||
dependencies of the plugin(s) being loaded. Defaults to False.
|
||||
:type verify_requirements: bool
|
||||
:return: The manager instance, initialized for testing
|
||||
|
||||
"""
|
||||
|
||||
o = cls.__new__(cls)
|
||||
o._init_attributes(namespace,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback)
|
||||
o._init_plugins(extensions)
|
||||
return o
|
||||
|
||||
def _init_attributes(self, namespace, propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None):
|
||||
self.namespace = namespace
|
||||
self.propagate_map_exceptions = propagate_map_exceptions
|
||||
self._on_load_failure_callback = on_load_failure_callback
|
||||
|
||||
def _init_plugins(self, extensions):
|
||||
self.extensions = extensions
|
||||
self._extensions_by_name = None
|
||||
|
||||
ENTRY_POINT_CACHE = {}
|
||||
|
||||
def _find_entry_points(self, namespace):
|
||||
if namespace not in self.ENTRY_POINT_CACHE:
|
||||
eps = list(pkg_resources.iter_entry_points(namespace))
|
||||
self.ENTRY_POINT_CACHE[namespace] = eps
|
||||
return self.ENTRY_POINT_CACHE[namespace]
|
||||
|
||||
def _load_plugins(self, invoke_on_load, invoke_args, invoke_kwds,
|
||||
verify_requirements):
|
||||
extensions = []
|
||||
for ep in self._find_entry_points(self.namespace):
|
||||
LOG.debug('found extension %r', ep)
|
||||
try:
|
||||
ext = self._load_one_plugin(ep,
|
||||
invoke_on_load,
|
||||
invoke_args,
|
||||
invoke_kwds,
|
||||
verify_requirements,
|
||||
)
|
||||
if ext:
|
||||
extensions.append(ext)
|
||||
except (KeyboardInterrupt, AssertionError):
|
||||
raise
|
||||
except Exception as err:
|
||||
if self._on_load_failure_callback is not None:
|
||||
self._on_load_failure_callback(self, ep, err)
|
||||
else:
|
||||
LOG.error('Could not load %r: %s', ep.name, err)
|
||||
LOG.exception(err)
|
||||
return extensions
|
||||
|
||||
def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds,
|
||||
verify_requirements):
|
||||
plugin = ep.load(require=verify_requirements)
|
||||
if invoke_on_load:
|
||||
obj = plugin(*invoke_args, **invoke_kwds)
|
||||
else:
|
||||
obj = None
|
||||
return Extension(ep.name, ep, plugin, obj)
|
||||
|
||||
def names(self):
|
||||
"Returns the names of the discovered extensions"
|
||||
# We want to return the names of the extensions in the order
|
||||
# they would be used by map(), since some subclasses change
|
||||
# that order.
|
||||
return [e.name for e in self.extensions]
|
||||
|
||||
def map(self, func, *args, **kwds):
|
||||
"""Iterate over the extensions invoking func() for each.
|
||||
|
||||
The signature for func() should be::
|
||||
|
||||
def func(ext, *args, **kwds):
|
||||
pass
|
||||
|
||||
The first argument to func(), 'ext', is the
|
||||
:class:`~stevedore.extension.Extension` instance.
|
||||
|
||||
Exceptions raised from within func() are propagated up and
|
||||
processing stopped if self.propagate_map_exceptions is True,
|
||||
otherwise they are logged and ignored.
|
||||
|
||||
:param func: Callable to invoke for each extension.
|
||||
:param args: Variable arguments to pass to func()
|
||||
:param kwds: Keyword arguments to pass to func()
|
||||
:returns: List of values returned from func()
|
||||
"""
|
||||
if not self.extensions:
|
||||
# FIXME: Use a more specific exception class here.
|
||||
raise RuntimeError('No %s extensions found' % self.namespace)
|
||||
response = []
|
||||
for e in self.extensions:
|
||||
self._invoke_one_plugin(response.append, func, e, args, kwds)
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def _call_extension_method(extension, method_name, *args, **kwds):
|
||||
return getattr(extension.obj, method_name)(*args, **kwds)
|
||||
|
||||
def map_method(self, method_name, *args, **kwds):
|
||||
"""Iterate over the extensions invoking a method by name.
|
||||
|
||||
This is equivalent of using :meth:`map` with func set to
|
||||
`lambda x: x.obj.method_name()`
|
||||
while being more convenient.
|
||||
|
||||
Exceptions raised from within the called method are propagated up
|
||||
and processing stopped if self.propagate_map_exceptions is True,
|
||||
otherwise they are logged and ignored.
|
||||
|
||||
.. versionadded:: 0.12
|
||||
|
||||
:param method_name: The extension method name
|
||||
to call for each extension.
|
||||
:param args: Variable arguments to pass to method
|
||||
:param kwds: Keyword arguments to pass to method
|
||||
:returns: List of values returned from methods
|
||||
"""
|
||||
return self.map(self._call_extension_method,
|
||||
method_name, *args, **kwds)
|
||||
|
||||
def _invoke_one_plugin(self, response_callback, func, e, args, kwds):
|
||||
try:
|
||||
response_callback(func(e, *args, **kwds))
|
||||
except Exception as err:
|
||||
if self.propagate_map_exceptions:
|
||||
raise
|
||||
else:
|
||||
LOG.error('error calling %r: %s', e.name, err)
|
||||
LOG.exception(err)
|
||||
|
||||
def __iter__(self):
|
||||
"""Produce iterator for the manager.
|
||||
|
||||
Iterating over an ExtensionManager produces the :class:`Extension`
|
||||
instances in the order they would be invoked.
|
||||
"""
|
||||
return iter(self.extensions)
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Return the named extension.
|
||||
|
||||
Accessing an ExtensionManager as a dictionary (``em['name']``)
|
||||
produces the :class:`Extension` instance with the
|
||||
specified name.
|
||||
"""
|
||||
if self._extensions_by_name is None:
|
||||
d = {}
|
||||
for e in self.extensions:
|
||||
d[e.name] = e
|
||||
self._extensions_by_name = d
|
||||
return self._extensions_by_name[name]
|
64
lib/stevedore/hook.py
Normal file
64
lib/stevedore/hook.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
from .named import NamedExtensionManager
|
||||
|
||||
|
||||
class HookManager(NamedExtensionManager):
|
||||
"""Coordinate execution of multiple extensions using a common name.
|
||||
|
||||
:param namespace: The namespace for the entry points.
|
||||
:type namespace: str
|
||||
:param name: The name of the hooks to load.
|
||||
:type name: str
|
||||
:param invoke_on_load: Boolean controlling whether to invoke the
|
||||
object returned by the entry point after the driver is loaded.
|
||||
:type invoke_on_load: bool
|
||||
:param invoke_args: Positional arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_args: tuple
|
||||
:param invoke_kwds: Named arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_kwds: dict
|
||||
:param on_load_failure_callback: Callback function that will be called when
|
||||
a entrypoint can not be loaded. The arguments that will be provided
|
||||
when this is called (when an entrypoint fails to load) are
|
||||
(manager, entrypoint, exception)
|
||||
:type on_load_failure_callback: function
|
||||
:param verify_requirements: Use setuptools to enforce the
|
||||
dependencies of the plugin(s) being loaded. Defaults to False.
|
||||
:type verify_requirements: bool
|
||||
"""
|
||||
|
||||
def __init__(self, namespace, name,
|
||||
invoke_on_load=False, invoke_args=(), invoke_kwds={},
|
||||
on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
super(HookManager, self).__init__(
|
||||
namespace,
|
||||
[name],
|
||||
invoke_on_load=invoke_on_load,
|
||||
invoke_args=invoke_args,
|
||||
invoke_kwds=invoke_kwds,
|
||||
on_load_failure_callback=on_load_failure_callback,
|
||||
verify_requirements=verify_requirements,
|
||||
)
|
||||
|
||||
def _init_attributes(self, namespace, names, name_order=False,
|
||||
propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None):
|
||||
super(HookManager, self)._init_attributes(
|
||||
namespace, names,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback)
|
||||
self._name = names[0]
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Return the named extensions.
|
||||
|
||||
Accessing a HookManager as a dictionary (``em['name']``)
|
||||
produces a list of the :class:`Extension` instance(s) with the
|
||||
specified name, in the order they would be invoked by map().
|
||||
"""
|
||||
if name != self._name:
|
||||
raise KeyError(name)
|
||||
return self.extensions
|
124
lib/stevedore/named.py
Normal file
124
lib/stevedore/named.py
Normal file
|
@ -0,0 +1,124 @@
|
|||
from .extension import ExtensionManager
|
||||
|
||||
|
||||
class NamedExtensionManager(ExtensionManager):
|
||||
"""Loads only the named extensions.
|
||||
|
||||
This is useful for explicitly enabling extensions in a
|
||||
configuration file, for example.
|
||||
|
||||
:param namespace: The namespace for the entry points.
|
||||
:type namespace: str
|
||||
:param names: The names of the extensions to load.
|
||||
:type names: list(str)
|
||||
:param invoke_on_load: Boolean controlling whether to invoke the
|
||||
object returned by the entry point after the driver is loaded.
|
||||
:type invoke_on_load: bool
|
||||
:param invoke_args: Positional arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_args: tuple
|
||||
:param invoke_kwds: Named arguments to pass when invoking
|
||||
the object returned by the entry point. Only used if invoke_on_load
|
||||
is True.
|
||||
:type invoke_kwds: dict
|
||||
:param name_order: If true, sort the loaded extensions to match the
|
||||
order used in ``names``.
|
||||
:type name_order: bool
|
||||
:param propagate_map_exceptions: Boolean controlling whether exceptions
|
||||
are propagated up through the map call or whether they are logged and
|
||||
then ignored
|
||||
:type propagate_map_exceptions: bool
|
||||
:param on_load_failure_callback: Callback function that will be called when
|
||||
a entrypoint can not be loaded. The arguments that will be provided
|
||||
when this is called (when an entrypoint fails to load) are
|
||||
(manager, entrypoint, exception)
|
||||
:type on_load_failure_callback: function
|
||||
:param verify_requirements: Use setuptools to enforce the
|
||||
dependencies of the plugin(s) being loaded. Defaults to False.
|
||||
:type verify_requirements: bool
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, namespace, names,
|
||||
invoke_on_load=False, invoke_args=(), invoke_kwds={},
|
||||
name_order=False, propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
self._init_attributes(
|
||||
namespace, names, name_order=name_order,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback)
|
||||
extensions = self._load_plugins(invoke_on_load,
|
||||
invoke_args,
|
||||
invoke_kwds,
|
||||
verify_requirements)
|
||||
self._init_plugins(extensions)
|
||||
|
||||
@classmethod
|
||||
def make_test_instance(cls, extensions, namespace='TESTING',
|
||||
propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None,
|
||||
verify_requirements=False):
|
||||
"""Construct a test NamedExtensionManager
|
||||
|
||||
Test instances are passed a list of extensions to use rather than
|
||||
loading them from entry points.
|
||||
|
||||
:param extensions: Pre-configured Extension instances
|
||||
:type extensions: list of :class:`~stevedore.extension.Extension`
|
||||
:param namespace: The namespace for the manager; used only for
|
||||
identification since the extensions are passed in.
|
||||
:type namespace: str
|
||||
:param propagate_map_exceptions: Boolean controlling whether exceptions
|
||||
are propagated up through the map call or whether they are logged
|
||||
and then ignored
|
||||
:type propagate_map_exceptions: bool
|
||||
:param on_load_failure_callback: Callback function that will
|
||||
be called when a entrypoint can not be loaded. The
|
||||
arguments that will be provided when this is called (when
|
||||
an entrypoint fails to load) are (manager, entrypoint,
|
||||
exception)
|
||||
:type on_load_failure_callback: function
|
||||
:param verify_requirements: Use setuptools to enforce the
|
||||
dependencies of the plugin(s) being loaded. Defaults to False.
|
||||
:type verify_requirements: bool
|
||||
:return: The manager instance, initialized for testing
|
||||
|
||||
"""
|
||||
|
||||
o = cls.__new__(cls)
|
||||
names = [e.name for e in extensions]
|
||||
o._init_attributes(namespace, names,
|
||||
propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback)
|
||||
o._init_plugins(extensions)
|
||||
return o
|
||||
|
||||
def _init_attributes(self, namespace, names, name_order=False,
|
||||
propagate_map_exceptions=False,
|
||||
on_load_failure_callback=None):
|
||||
super(NamedExtensionManager, self)._init_attributes(
|
||||
namespace, propagate_map_exceptions=propagate_map_exceptions,
|
||||
on_load_failure_callback=on_load_failure_callback)
|
||||
|
||||
self._names = names
|
||||
self._name_order = name_order
|
||||
|
||||
def _init_plugins(self, extensions):
|
||||
super(NamedExtensionManager, self)._init_plugins(extensions)
|
||||
|
||||
if self._name_order:
|
||||
self.extensions = [self[n] for n in self._names]
|
||||
|
||||
def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds,
|
||||
verify_requirements):
|
||||
# Check the name before going any further to prevent
|
||||
# undesirable code from being loaded at all if we are not
|
||||
# going to use it.
|
||||
if ep.name not in self._names:
|
||||
return None
|
||||
return super(NamedExtensionManager, self)._load_one_plugin(
|
||||
ep, invoke_on_load, invoke_args, invoke_kwds,
|
||||
verify_requirements,
|
||||
)
|
|
@ -3,6 +3,22 @@ import os
|
|||
import subprocess
|
||||
import sys
|
||||
import platform
|
||||
|
||||
# init preliminaries
|
||||
SYS_ARGV = sys.argv[1:]
|
||||
APP_FILENAME = sys.argv[0]
|
||||
APP_NAME = os.path.basename(APP_FILENAME)
|
||||
PROGRAM_DIR = os.path.dirname(os.path.normpath(os.path.abspath(os.path.join(__file__, os.pardir))))
|
||||
LOG_DIR = os.path.join(PROGRAM_DIR, 'logs')
|
||||
LOG_FILE = os.path.join(LOG_DIR, 'postprocess.log')
|
||||
CONFIG_FILE = os.path.join(PROGRAM_DIR, 'autoProcessMedia.cfg')
|
||||
CONFIG_SPEC_FILE = os.path.join(PROGRAM_DIR, 'autoProcessMedia.cfg.spec')
|
||||
CONFIG_MOVIE_FILE = os.path.join(PROGRAM_DIR, 'autoProcessMovie.cfg')
|
||||
CONFIG_TV_FILE = os.path.join(PROGRAM_DIR, 'autoProcessTv.cfg')
|
||||
|
||||
# add our custom libs to the system path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(PROGRAM_DIR, 'lib')))
|
||||
|
||||
from nzbtomedia import logger, versionCheck
|
||||
from nzbtomedia.nzbToMediaConfig import config
|
||||
from nzbtomedia.nzbToMediaUtil import WakeUp, makeDir
|
||||
|
@ -28,21 +44,9 @@ NZBGET_POSTPROCESS_SUCCESS = 93
|
|||
NZBGET_POSTPROCESS_ERROR = 94
|
||||
NZBGET_POSTPROCESS_NONE = 95
|
||||
|
||||
# config constants
|
||||
CFG = None
|
||||
CFG_LOGGING = None
|
||||
APP_FILENAME = None
|
||||
APP_NAME = None
|
||||
PROGRAM_DIR = None
|
||||
LOG_DIR = None
|
||||
LOG_FILE = None
|
||||
LOG_DEBUG = None
|
||||
CONFIG_FILE = None
|
||||
CONFIG_SPEC_FILE = None
|
||||
CONFIG_MOVIE_FILE = None
|
||||
CONFIG_TV_FILE = None
|
||||
SYS_ENCODING = None
|
||||
SYS_ARGV = None
|
||||
|
||||
AUTO_UPDATE = None
|
||||
NZBTOMEDIA_VERSION = None
|
||||
|
@ -119,8 +123,7 @@ __INITIALIZED__ = False
|
|||
def initialize(section=None):
|
||||
global NZBGET_POSTPROCESS_ERROR, NZBGET_POSTPROCESS_NONE, NZBGET_POSTPROCESS_PARCHECK, NZBGET_POSTPROCESS_SUCCESS, \
|
||||
NZBTOMEDIA_TIMEOUT, FORKS, FORK_DEFAULT, FORK_FAILED_TORRENT, FORK_FAILED, SICKBEARD_TORRENT, SICKBEARD_FAILED, \
|
||||
PROGRAM_DIR, CFG, CFG_LOGGING, CONFIG_FILE, CONFIG_MOVIE_FILE, CONFIG_SPEC_FILE, LOG_DIR, NZBTOMEDIA_BRANCH, \
|
||||
CONFIG_TV_FILE, LOG_FILE, NZBTOMEDIA_VERSION, NEWEST_VERSION, NEWEST_VERSION_STRING, VERSION_NOTIFY, SYS_ARGV, \
|
||||
NZBTOMEDIA_BRANCH, NZBTOMEDIA_VERSION, NEWEST_VERSION, NEWEST_VERSION_STRING, VERSION_NOTIFY, SYS_ARGV, CFG, \
|
||||
SABNZB_NO_OF_ARGUMENTS, SABNZB_0717_NO_OF_ARGUMENTS, CATEGORIES, TORRENT_CLIENTAGENT, USELINK, OUTPUTDIRECTORY, NOFLATTEN, \
|
||||
UTORRENTPWD, UTORRENTUSR, UTORRENTWEBUI, DELUGEHOST, DELUGEPORT, DELUGEUSR, DELUGEPWD, TRANSMISSIONHOST, TRANSMISSIONPORT, \
|
||||
TRANSMISSIONPWD, TRANSMISSIONUSR, COMPRESSEDCONTAINER, MEDIACONTAINER, METACONTAINER, MINSAMPLESIZE, SAMPLEIDS, \
|
||||
|
@ -134,21 +137,6 @@ def initialize(section=None):
|
|||
if __INITIALIZED__:
|
||||
return False
|
||||
|
||||
# init preliminaries
|
||||
SYS_ARGV = sys.argv[1:]
|
||||
APP_FILENAME = sys.argv[0]
|
||||
APP_NAME = os.path.basename(APP_FILENAME)
|
||||
PROGRAM_DIR = os.path.dirname(os.path.normpath(os.path.abspath(os.path.join(__file__, os.pardir))))
|
||||
LOG_DIR = os.path.join(PROGRAM_DIR, 'logs')
|
||||
LOG_FILE = os.path.join(LOG_DIR, 'postprocess.log')
|
||||
CONFIG_FILE = os.path.join(PROGRAM_DIR, 'autoProcessMedia.cfg')
|
||||
CONFIG_SPEC_FILE = os.path.join(PROGRAM_DIR, 'autoProcessMedia.cfg.spec')
|
||||
CONFIG_MOVIE_FILE = os.path.join(PROGRAM_DIR, 'autoProcessMovie.cfg')
|
||||
CONFIG_TV_FILE = os.path.join(PROGRAM_DIR, 'autoProcessTv.cfg')
|
||||
|
||||
# add our custom libs to the system path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(PROGRAM_DIR, 'lib')))
|
||||
|
||||
try:
|
||||
locale.setlocale(locale.LC_ALL, "")
|
||||
SYS_ENCODING = locale.getpreferredencoding()
|
||||
|
@ -219,6 +207,7 @@ def initialize(section=None):
|
|||
# Set Current Version
|
||||
logger.info('nzbToMedia Version:' + NZBTOMEDIA_VERSION + ' Branch:' + GIT_BRANCH + ' (' + platform.system() + ' ' + platform.release() + ')')
|
||||
|
||||
if int(CFG["WakeOnLan"]["wake"]) == 1:
|
||||
WakeUp()
|
||||
|
||||
NZB_CLIENTAGENT = CFG["Nzb"]["clientAgent"] # sabnzbd
|
||||
|
|
|
@ -8,6 +8,7 @@ import time
|
|||
import nzbtomedia
|
||||
|
||||
from lib import requests
|
||||
from lib import guessit
|
||||
from nzbtomedia.linktastic import linktastic
|
||||
from nzbtomedia import logger
|
||||
from nzbtomedia.synchronousdeluge.client import DelugeClient
|
||||
|
@ -248,10 +249,6 @@ def TestCon(host, port):
|
|||
|
||||
|
||||
def WakeUp():
|
||||
wake = int(nzbtomedia.CFG["WakeOnLan"]["wake"])
|
||||
if wake == 0: # just return if we don't need to wake anything.
|
||||
return
|
||||
logger.info(("Loading WakeOnLan config from %s" % (nzbtomedia.CONFIG_FILE)))
|
||||
host = nzbtomedia.CFG["WakeOnLan"]["host"]
|
||||
port = int(nzbtomedia.CFG["WakeOnLan"]["port"])
|
||||
mac = nzbtomedia.CFG["WakeOnLan"]["mac"]
|
||||
|
@ -646,10 +643,17 @@ def find_imdbid(dirName, nzbName):
|
|||
return imdbid
|
||||
|
||||
logger.info('Searching IMDB for imdbID ...')
|
||||
m = re.search("^(.+)(\d{4})\W", nzbName)
|
||||
if m:
|
||||
title = m.group(1)
|
||||
year = m.group(2)
|
||||
guess = guessit.guess_movie_info(nzbName)
|
||||
if guess:
|
||||
# Movie Title
|
||||
title = None
|
||||
if 'title' in guess:
|
||||
title = guess['title']
|
||||
|
||||
# Movie Year
|
||||
year = None
|
||||
if 'year' in guess:
|
||||
year = guess['year']
|
||||
|
||||
url = "http://www.omdbapi.com"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue