updated libs to fix guessit and subliminal. Fixes #1080

This commit is contained in:
clinton-hall 2016-09-21 13:31:41 +09:30
parent 319d418af8
commit 0625f7f3c0
263 changed files with 28711 additions and 12615 deletions

1
.gitignore vendored
View file

@ -5,6 +5,7 @@
*.log
*.pid
*.db
*.dbm
/userscripts/
/logs/
/.idea/

View file

@ -1124,7 +1124,7 @@ def import_subs(filename):
if not core.GETSUBS:
return
try:
subliminal.cache_region.configure('dogpile.cache.memory')
subliminal.region.configure('dogpile.cache.dbm', arguments={'filename': 'cachefile.dbm'})
except:
pass
@ -1139,9 +1139,9 @@ def import_subs(filename):
logger.debug("Attempting to download subtitles for {0}".format(filename), 'SUBTITLES')
try:
video = subliminal.scan_video(filename, subtitles=True, embedded_subtitles=True)
subtitles = subliminal.download_best_subtitles({video}, languages, hearing_impaired=False)
subliminal.save_subtitles(subtitles)
video = subliminal.scan_video(filename)
subtitles = subliminal.download_best_subtitles({video}, languages)
subliminal.save_subtitles(video, subtitles[video])
except Exception as e:
logger.error("Failed to download subtitles for {0} due to: {1}".format(filename, e), 'SUBTITLES')

View file

@ -0,0 +1,3 @@
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)

View file

@ -0,0 +1,18 @@
# Copyright 2009 Brian Quinlan. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Execute computations asynchronously using threads or processes."""
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
from concurrent.futures._base import (FIRST_COMPLETED,
FIRST_EXCEPTION,
ALL_COMPLETED,
CancelledError,
TimeoutError,
Future,
Executor,
wait,
as_completed)
from concurrent.futures.process import ProcessPoolExecutor
from concurrent.futures.thread import ThreadPoolExecutor

View file

@ -0,0 +1,574 @@
# Copyright 2009 Brian Quinlan. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
from __future__ import with_statement
import logging
import threading
import time
try:
from collections import namedtuple
except ImportError:
from concurrent.futures._compat import namedtuple
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
FIRST_COMPLETED = 'FIRST_COMPLETED'
FIRST_EXCEPTION = 'FIRST_EXCEPTION'
ALL_COMPLETED = 'ALL_COMPLETED'
_AS_COMPLETED = '_AS_COMPLETED'
# Possible future states (for internal use by the futures package).
PENDING = 'PENDING'
RUNNING = 'RUNNING'
# The future was cancelled by the user...
CANCELLED = 'CANCELLED'
# ...and _Waiter.add_cancelled() was called by a worker.
CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED'
FINISHED = 'FINISHED'
_FUTURE_STATES = [
PENDING,
RUNNING,
CANCELLED,
CANCELLED_AND_NOTIFIED,
FINISHED
]
_STATE_TO_DESCRIPTION_MAP = {
PENDING: "pending",
RUNNING: "running",
CANCELLED: "cancelled",
CANCELLED_AND_NOTIFIED: "cancelled",
FINISHED: "finished"
}
# Logger for internal use by the futures package.
LOGGER = logging.getLogger("concurrent.futures")
class Error(Exception):
"""Base class for all future-related exceptions."""
pass
class CancelledError(Error):
"""The Future was cancelled."""
pass
class TimeoutError(Error):
"""The operation exceeded the given deadline."""
pass
class _Waiter(object):
"""Provides the event that wait() and as_completed() block on."""
def __init__(self):
self.event = threading.Event()
self.finished_futures = []
def add_result(self, future):
self.finished_futures.append(future)
def add_exception(self, future):
self.finished_futures.append(future)
def add_cancelled(self, future):
self.finished_futures.append(future)
class _AsCompletedWaiter(_Waiter):
"""Used by as_completed()."""
def __init__(self):
super(_AsCompletedWaiter, self).__init__()
self.lock = threading.Lock()
def add_result(self, future):
with self.lock:
super(_AsCompletedWaiter, self).add_result(future)
self.event.set()
def add_exception(self, future):
with self.lock:
super(_AsCompletedWaiter, self).add_exception(future)
self.event.set()
def add_cancelled(self, future):
with self.lock:
super(_AsCompletedWaiter, self).add_cancelled(future)
self.event.set()
class _FirstCompletedWaiter(_Waiter):
"""Used by wait(return_when=FIRST_COMPLETED)."""
def add_result(self, future):
super(_FirstCompletedWaiter, self).add_result(future)
self.event.set()
def add_exception(self, future):
super(_FirstCompletedWaiter, self).add_exception(future)
self.event.set()
def add_cancelled(self, future):
super(_FirstCompletedWaiter, self).add_cancelled(future)
self.event.set()
class _AllCompletedWaiter(_Waiter):
"""Used by wait(return_when=FIRST_EXCEPTION and ALL_COMPLETED)."""
def __init__(self, num_pending_calls, stop_on_exception):
self.num_pending_calls = num_pending_calls
self.stop_on_exception = stop_on_exception
self.lock = threading.Lock()
super(_AllCompletedWaiter, self).__init__()
def _decrement_pending_calls(self):
with self.lock:
self.num_pending_calls -= 1
if not self.num_pending_calls:
self.event.set()
def add_result(self, future):
super(_AllCompletedWaiter, self).add_result(future)
self._decrement_pending_calls()
def add_exception(self, future):
super(_AllCompletedWaiter, self).add_exception(future)
if self.stop_on_exception:
self.event.set()
else:
self._decrement_pending_calls()
def add_cancelled(self, future):
super(_AllCompletedWaiter, self).add_cancelled(future)
self._decrement_pending_calls()
class _AcquireFutures(object):
"""A context manager that does an ordered acquire of Future conditions."""
def __init__(self, futures):
self.futures = sorted(futures, key=id)
def __enter__(self):
for future in self.futures:
future._condition.acquire()
def __exit__(self, *args):
for future in self.futures:
future._condition.release()
def _create_and_install_waiters(fs, return_when):
if return_when == _AS_COMPLETED:
waiter = _AsCompletedWaiter()
elif return_when == FIRST_COMPLETED:
waiter = _FirstCompletedWaiter()
else:
pending_count = sum(
f._state not in [CANCELLED_AND_NOTIFIED, FINISHED] for f in fs)
if return_when == FIRST_EXCEPTION:
waiter = _AllCompletedWaiter(pending_count, stop_on_exception=True)
elif return_when == ALL_COMPLETED:
waiter = _AllCompletedWaiter(pending_count, stop_on_exception=False)
else:
raise ValueError("Invalid return condition: %r" % return_when)
for f in fs:
f._waiters.append(waiter)
return waiter
def as_completed(fs, timeout=None):
"""An iterator over the given futures that yields each as it completes.
Args:
fs: The sequence of Futures (possibly created by different Executors) to
iterate over.
timeout: The maximum number of seconds to wait. If None, then there
is no limit on the wait time.
Returns:
An iterator that yields the given Futures as they complete (finished or
cancelled).
Raises:
TimeoutError: If the entire result iterator could not be generated
before the given timeout.
"""
if timeout is not None:
end_time = timeout + time.time()
with _AcquireFutures(fs):
finished = set(
f for f in fs
if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
pending = set(fs) - finished
waiter = _create_and_install_waiters(fs, _AS_COMPLETED)
try:
for future in finished:
yield future
while pending:
if timeout is None:
wait_timeout = None
else:
wait_timeout = end_time - time.time()
if wait_timeout < 0:
raise TimeoutError(
'%d (of %d) futures unfinished' % (
len(pending), len(fs)))
waiter.event.wait(wait_timeout)
with waiter.lock:
finished = waiter.finished_futures
waiter.finished_futures = []
waiter.event.clear()
for future in finished:
yield future
pending.remove(future)
finally:
for f in fs:
f._waiters.remove(waiter)
DoneAndNotDoneFutures = namedtuple(
'DoneAndNotDoneFutures', 'done not_done')
def wait(fs, timeout=None, return_when=ALL_COMPLETED):
"""Wait for the futures in the given sequence to complete.
Args:
fs: The sequence of Futures (possibly created by different Executors) to
wait upon.
timeout: The maximum number of seconds to wait. If None, then there
is no limit on the wait time.
return_when: Indicates when this function should return. The options
are:
FIRST_COMPLETED - Return when any future finishes or is
cancelled.
FIRST_EXCEPTION - Return when any future finishes by raising an
exception. If no future raises an exception
then it is equivalent to ALL_COMPLETED.
ALL_COMPLETED - Return when all futures finish or are cancelled.
Returns:
A named 2-tuple of sets. The first set, named 'done', contains the
futures that completed (is finished or cancelled) before the wait
completed. The second set, named 'not_done', contains uncompleted
futures.
"""
with _AcquireFutures(fs):
done = set(f for f in fs
if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
not_done = set(fs) - done
if (return_when == FIRST_COMPLETED) and done:
return DoneAndNotDoneFutures(done, not_done)
elif (return_when == FIRST_EXCEPTION) and done:
if any(f for f in done
if not f.cancelled() and f.exception() is not None):
return DoneAndNotDoneFutures(done, not_done)
if len(done) == len(fs):
return DoneAndNotDoneFutures(done, not_done)
waiter = _create_and_install_waiters(fs, return_when)
waiter.event.wait(timeout)
for f in fs:
f._waiters.remove(waiter)
done.update(waiter.finished_futures)
return DoneAndNotDoneFutures(done, set(fs) - done)
class Future(object):
"""Represents the result of an asynchronous computation."""
def __init__(self):
"""Initializes the future. Should not be called by clients."""
self._condition = threading.Condition()
self._state = PENDING
self._result = None
self._exception = None
self._waiters = []
self._done_callbacks = []
def _invoke_callbacks(self):
for callback in self._done_callbacks:
try:
callback(self)
except Exception:
LOGGER.exception('exception calling callback for %r', self)
def __repr__(self):
with self._condition:
if self._state == FINISHED:
if self._exception:
return '<Future at %s state=%s raised %s>' % (
hex(id(self)),
_STATE_TO_DESCRIPTION_MAP[self._state],
self._exception.__class__.__name__)
else:
return '<Future at %s state=%s returned %s>' % (
hex(id(self)),
_STATE_TO_DESCRIPTION_MAP[self._state],
self._result.__class__.__name__)
return '<Future at %s state=%s>' % (
hex(id(self)),
_STATE_TO_DESCRIPTION_MAP[self._state])
def cancel(self):
"""Cancel the future if possible.
Returns True if the future was cancelled, False otherwise. A future
cannot be cancelled if it is running or has already completed.
"""
with self._condition:
if self._state in [RUNNING, FINISHED]:
return False
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
return True
self._state = CANCELLED
self._condition.notify_all()
self._invoke_callbacks()
return True
def cancelled(self):
"""Return True if the future has cancelled."""
with self._condition:
return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]
def running(self):
"""Return True if the future is currently executing."""
with self._condition:
return self._state == RUNNING
def done(self):
"""Return True of the future was cancelled or finished executing."""
with self._condition:
return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]
def __get_result(self):
if self._exception:
raise self._exception
else:
return self._result
def add_done_callback(self, fn):
"""Attaches a callable that will be called when the future finishes.
Args:
fn: A callable that will be called with this future as its only
argument when the future completes or is cancelled. The callable
will always be called by a thread in the same process in which
it was added. If the future has already completed or been
cancelled then the callable will be called immediately. These
callables are called in the order that they were added.
"""
with self._condition:
if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]:
self._done_callbacks.append(fn)
return
fn(self)
def result(self, timeout=None):
"""Return the result of the call that the future represents.
Args:
timeout: The number of seconds to wait for the result if the future
isn't done. If None, then there is no limit on the wait time.
Returns:
The result of the call that the future represents.
Raises:
CancelledError: If the future was cancelled.
TimeoutError: If the future didn't finish executing before the given
timeout.
Exception: If the call raised then that exception will be raised.
"""
with self._condition:
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self.__get_result()
self._condition.wait(timeout)
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self.__get_result()
else:
raise TimeoutError()
def exception(self, timeout=None):
"""Return the exception raised by the call that the future represents.
Args:
timeout: The number of seconds to wait for the exception if the
future isn't done. If None, then there is no limit on the wait
time.
Returns:
The exception raised by the call that the future represents or None
if the call completed without raising.
Raises:
CancelledError: If the future was cancelled.
TimeoutError: If the future didn't finish executing before the given
timeout.
"""
with self._condition:
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self._exception
self._condition.wait(timeout)
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self._exception
else:
raise TimeoutError()
# The following methods should only be used by Executors and in tests.
def set_running_or_notify_cancel(self):
"""Mark the future as running or process any cancel notifications.
Should only be used by Executor implementations and unit tests.
If the future has been cancelled (cancel() was called and returned
True) then any threads waiting on the future completing (though calls
to as_completed() or wait()) are notified and False is returned.
If the future was not cancelled then it is put in the running state
(future calls to running() will return True) and True is returned.
This method should be called by Executor implementations before
executing the work associated with this future. If this method returns
False then the work should not be executed.
Returns:
False if the Future was cancelled, True otherwise.
Raises:
RuntimeError: if this method was already called or if set_result()
or set_exception() was called.
"""
with self._condition:
if self._state == CANCELLED:
self._state = CANCELLED_AND_NOTIFIED
for waiter in self._waiters:
waiter.add_cancelled(self)
# self._condition.notify_all() is not necessary because
# self.cancel() triggers a notification.
return False
elif self._state == PENDING:
self._state = RUNNING
return True
else:
LOGGER.critical('Future %s in unexpected state: %s',
id(self.future),
self.future._state)
raise RuntimeError('Future in unexpected state')
def set_result(self, result):
"""Sets the return value of work associated with the future.
Should only be used by Executor implementations and unit tests.
"""
with self._condition:
self._result = result
self._state = FINISHED
for waiter in self._waiters:
waiter.add_result(self)
self._condition.notify_all()
self._invoke_callbacks()
def set_exception(self, exception):
"""Sets the result of the future as being the given exception.
Should only be used by Executor implementations and unit tests.
"""
with self._condition:
self._exception = exception
self._state = FINISHED
for waiter in self._waiters:
waiter.add_exception(self)
self._condition.notify_all()
self._invoke_callbacks()
class Executor(object):
"""This is an abstract base class for concrete asynchronous executors."""
def submit(self, fn, *args, **kwargs):
"""Submits a callable to be executed with the given arguments.
Schedules the callable to be executed as fn(*args, **kwargs) and returns
a Future instance representing the execution of the callable.
Returns:
A Future representing the given call.
"""
raise NotImplementedError()
def map(self, fn, *iterables, **kwargs):
"""Returns a iterator equivalent to map(fn, iter).
Args:
fn: A callable that will take as many arguments as there are
passed iterables.
timeout: The maximum number of seconds to wait. If None, then there
is no limit on the wait time.
Returns:
An iterator equivalent to: map(func, *iterables) but the calls may
be evaluated out-of-order.
Raises:
TimeoutError: If the entire result iterator could not be generated
before the given timeout.
Exception: If fn(*args) raises for any values.
"""
timeout = kwargs.get('timeout')
if timeout is not None:
end_time = timeout + time.time()
fs = [self.submit(fn, *args) for args in zip(*iterables)]
try:
for future in fs:
if timeout is None:
yield future.result()
else:
yield future.result(end_time - time.time())
finally:
for future in fs:
future.cancel()
def shutdown(self, wait=True):
"""Clean-up the resources associated with the Executor.
It is safe to call this method several times. Otherwise, no other
methods can be called after this one.
Args:
wait: If True then shutdown will not return until all running
futures have finished executing and the resources used by the
executor have been reclaimed.
"""
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.shutdown(wait=True)
return False

View file

@ -0,0 +1,101 @@
from keyword import iskeyword as _iskeyword
from operator import itemgetter as _itemgetter
import sys as _sys
def namedtuple(typename, field_names):
"""Returns a new subclass of tuple with named fields.
>>> Point = namedtuple('Point', 'x y')
>>> Point.__doc__ # docstring for the new class
'Point(x, y)'
>>> p = Point(11, y=22) # instantiate with positional args or keywords
>>> p[0] + p[1] # indexable like a plain tuple
33
>>> x, y = p # unpack like a regular tuple
>>> x, y
(11, 22)
>>> p.x + p.y # fields also accessable by name
33
>>> d = p._asdict() # convert to a dictionary
>>> d['x']
11
>>> Point(**d) # convert from a dictionary
Point(x=11, y=22)
>>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Point(x=100, y=22)
"""
# Parse and validate the field names. Validation serves two purposes,
# generating informative error messages and preventing template injection attacks.
if isinstance(field_names, basestring):
field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
field_names = tuple(map(str, field_names))
for name in (typename,) + field_names:
if not all(c.isalnum() or c=='_' for c in name):
raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
if _iskeyword(name):
raise ValueError('Type names and field names cannot be a keyword: %r' % name)
if name[0].isdigit():
raise ValueError('Type names and field names cannot start with a number: %r' % name)
seen_names = set()
for name in field_names:
if name.startswith('_'):
raise ValueError('Field names cannot start with an underscore: %r' % name)
if name in seen_names:
raise ValueError('Encountered duplicate field name: %r' % name)
seen_names.add(name)
# Create and fill-in the class template
numfields = len(field_names)
argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
reprtxt = ', '.join('%s=%%r' % name for name in field_names)
dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names))
template = '''class %(typename)s(tuple):
'%(typename)s(%(argtxt)s)' \n
__slots__ = () \n
_fields = %(field_names)r \n
def __new__(_cls, %(argtxt)s):
return _tuple.__new__(_cls, (%(argtxt)s)) \n
@classmethod
def _make(cls, iterable, new=tuple.__new__, len=len):
'Make a new %(typename)s object from a sequence or iterable'
result = new(cls, iterable)
if len(result) != %(numfields)d:
raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
return result \n
def __repr__(self):
return '%(typename)s(%(reprtxt)s)' %% self \n
def _asdict(t):
'Return a new dict which maps field names to their values'
return {%(dicttxt)s} \n
def _replace(_self, **kwds):
'Return a new %(typename)s object replacing specified fields with new values'
result = _self._make(map(kwds.pop, %(field_names)r, _self))
if kwds:
raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
return result \n
def __getnewargs__(self):
return tuple(self) \n\n''' % locals()
for i, name in enumerate(field_names):
template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
# Execute the template string in a temporary namespace and
# support tracing utilities by setting a value for frame.f_globals['__name__']
namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
_property=property, _tuple=tuple)
try:
exec(template, namespace)
except SyntaxError:
e = _sys.exc_info()[1]
raise SyntaxError(e.message + ':\n' + template)
result = namespace[typename]
# For pickling to work, the __module__ variable needs to be set to the frame
# where the named tuple is created. Bypass this step in enviroments where
# sys._getframe is not defined (Jython for example).
if hasattr(_sys, '_getframe'):
result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
return result

View file

@ -0,0 +1,363 @@
# Copyright 2009 Brian Quinlan. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Implements ProcessPoolExecutor.
The follow diagram and text describe the data-flow through the system:
|======================= In-process =====================|== Out-of-process ==|
+----------+ +----------+ +--------+ +-----------+ +---------+
| | => | Work Ids | => | | => | Call Q | => | |
| | +----------+ | | +-----------+ | |
| | | ... | | | | ... | | |
| | | 6 | | | | 5, call() | | |
| | | 7 | | | | ... | | |
| Process | | ... | | Local | +-----------+ | Process |
| Pool | +----------+ | Worker | | #1..n |
| Executor | | Thread | | |
| | +----------- + | | +-----------+ | |
| | <=> | Work Items | <=> | | <= | Result Q | <= | |
| | +------------+ | | +-----------+ | |
| | | 6: call() | | | | ... | | |
| | | future | | | | 4, result | | |
| | | ... | | | | 3, except | | |
+----------+ +------------+ +--------+ +-----------+ +---------+
Executor.submit() called:
- creates a uniquely numbered _WorkItem and adds it to the "Work Items" dict
- adds the id of the _WorkItem to the "Work Ids" queue
Local worker thread:
- reads work ids from the "Work Ids" queue and looks up the corresponding
WorkItem from the "Work Items" dict: if the work item has been cancelled then
it is simply removed from the dict, otherwise it is repackaged as a
_CallItem and put in the "Call Q". New _CallItems are put in the "Call Q"
until "Call Q" is full. NOTE: the size of the "Call Q" is kept small because
calls placed in the "Call Q" can no longer be cancelled with Future.cancel().
- reads _ResultItems from "Result Q", updates the future stored in the
"Work Items" dict and deletes the dict entry
Process #1..n:
- reads _CallItems from "Call Q", executes the calls, and puts the resulting
_ResultItems in "Request Q"
"""
from __future__ import with_statement
import atexit
import multiprocessing
import threading
import weakref
import sys
from concurrent.futures import _base
try:
import queue
except ImportError:
import Queue as queue
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
# Workers are created as daemon threads and processes. This is done to allow the
# interpreter to exit when there are still idle processes in a
# ProcessPoolExecutor's process pool (i.e. shutdown() was not called). However,
# allowing workers to die with the interpreter has two undesirable properties:
# - The workers would still be running during interpretor shutdown,
# meaning that they would fail in unpredictable ways.
# - The workers could be killed while evaluating a work item, which could
# be bad if the callable being evaluated has external side-effects e.g.
# writing to a file.
#
# To work around this problem, an exit handler is installed which tells the
# workers to exit when their work queues are empty and then waits until the
# threads/processes finish.
_threads_queues = weakref.WeakKeyDictionary()
_shutdown = False
def _python_exit():
global _shutdown
_shutdown = True
items = list(_threads_queues.items())
for t, q in items:
q.put(None)
for t, q in items:
t.join()
# Controls how many more calls than processes will be queued in the call queue.
# A smaller number will mean that processes spend more time idle waiting for
# work while a larger number will make Future.cancel() succeed less frequently
# (Futures in the call queue cannot be cancelled).
EXTRA_QUEUED_CALLS = 1
class _WorkItem(object):
def __init__(self, future, fn, args, kwargs):
self.future = future
self.fn = fn
self.args = args
self.kwargs = kwargs
class _ResultItem(object):
def __init__(self, work_id, exception=None, result=None):
self.work_id = work_id
self.exception = exception
self.result = result
class _CallItem(object):
def __init__(self, work_id, fn, args, kwargs):
self.work_id = work_id
self.fn = fn
self.args = args
self.kwargs = kwargs
def _process_worker(call_queue, result_queue):
"""Evaluates calls from call_queue and places the results in result_queue.
This worker is run in a separate process.
Args:
call_queue: A multiprocessing.Queue of _CallItems that will be read and
evaluated by the worker.
result_queue: A multiprocessing.Queue of _ResultItems that will written
to by the worker.
shutdown: A multiprocessing.Event that will be set as a signal to the
worker that it should exit when call_queue is empty.
"""
while True:
call_item = call_queue.get(block=True)
if call_item is None:
# Wake up queue management thread
result_queue.put(None)
return
try:
r = call_item.fn(*call_item.args, **call_item.kwargs)
except BaseException:
e = sys.exc_info()[1]
result_queue.put(_ResultItem(call_item.work_id,
exception=e))
else:
result_queue.put(_ResultItem(call_item.work_id,
result=r))
def _add_call_item_to_queue(pending_work_items,
work_ids,
call_queue):
"""Fills call_queue with _WorkItems from pending_work_items.
This function never blocks.
Args:
pending_work_items: A dict mapping work ids to _WorkItems e.g.
{5: <_WorkItem...>, 6: <_WorkItem...>, ...}
work_ids: A queue.Queue of work ids e.g. Queue([5, 6, ...]). Work ids
are consumed and the corresponding _WorkItems from
pending_work_items are transformed into _CallItems and put in
call_queue.
call_queue: A multiprocessing.Queue that will be filled with _CallItems
derived from _WorkItems.
"""
while True:
if call_queue.full():
return
try:
work_id = work_ids.get(block=False)
except queue.Empty:
return
else:
work_item = pending_work_items[work_id]
if work_item.future.set_running_or_notify_cancel():
call_queue.put(_CallItem(work_id,
work_item.fn,
work_item.args,
work_item.kwargs),
block=True)
else:
del pending_work_items[work_id]
continue
def _queue_management_worker(executor_reference,
processes,
pending_work_items,
work_ids_queue,
call_queue,
result_queue):
"""Manages the communication between this process and the worker processes.
This function is run in a local thread.
Args:
executor_reference: A weakref.ref to the ProcessPoolExecutor that owns
this thread. Used to determine if the ProcessPoolExecutor has been
garbage collected and that this function can exit.
process: A list of the multiprocessing.Process instances used as
workers.
pending_work_items: A dict mapping work ids to _WorkItems e.g.
{5: <_WorkItem...>, 6: <_WorkItem...>, ...}
work_ids_queue: A queue.Queue of work ids e.g. Queue([5, 6, ...]).
call_queue: A multiprocessing.Queue that will be filled with _CallItems
derived from _WorkItems for processing by the process workers.
result_queue: A multiprocessing.Queue of _ResultItems generated by the
process workers.
"""
nb_shutdown_processes = [0]
def shutdown_one_process():
"""Tell a worker to terminate, which will in turn wake us again"""
call_queue.put(None)
nb_shutdown_processes[0] += 1
while True:
_add_call_item_to_queue(pending_work_items,
work_ids_queue,
call_queue)
result_item = result_queue.get(block=True)
if result_item is not None:
work_item = pending_work_items[result_item.work_id]
del pending_work_items[result_item.work_id]
if result_item.exception:
work_item.future.set_exception(result_item.exception)
else:
work_item.future.set_result(result_item.result)
# Check whether we should start shutting down.
executor = executor_reference()
# No more work items can be added if:
# - The interpreter is shutting down OR
# - The executor that owns this worker has been collected OR
# - The executor that owns this worker has been shutdown.
if _shutdown or executor is None or executor._shutdown_thread:
# Since no new work items can be added, it is safe to shutdown
# this thread if there are no pending work items.
if not pending_work_items:
while nb_shutdown_processes[0] < len(processes):
shutdown_one_process()
# If .join() is not called on the created processes then
# some multiprocessing.Queue methods may deadlock on Mac OS
# X.
for p in processes:
p.join()
call_queue.close()
return
del executor
_system_limits_checked = False
_system_limited = None
def _check_system_limits():
global _system_limits_checked, _system_limited
if _system_limits_checked:
if _system_limited:
raise NotImplementedError(_system_limited)
_system_limits_checked = True
try:
import os
nsems_max = os.sysconf("SC_SEM_NSEMS_MAX")
except (AttributeError, ValueError):
# sysconf not available or setting not available
return
if nsems_max == -1:
# indetermine limit, assume that limit is determined
# by available memory only
return
if nsems_max >= 256:
# minimum number of semaphores available
# according to POSIX
return
_system_limited = "system provides too few semaphores (%d available, 256 necessary)" % nsems_max
raise NotImplementedError(_system_limited)
class ProcessPoolExecutor(_base.Executor):
def __init__(self, max_workers=None):
"""Initializes a new ProcessPoolExecutor instance.
Args:
max_workers: The maximum number of processes that can be used to
execute the given calls. If None or not given then as many
worker processes will be created as the machine has processors.
"""
_check_system_limits()
if max_workers is None:
self._max_workers = multiprocessing.cpu_count()
else:
self._max_workers = max_workers
# Make the call queue slightly larger than the number of processes to
# prevent the worker processes from idling. But don't make it too big
# because futures in the call queue cannot be cancelled.
self._call_queue = multiprocessing.Queue(self._max_workers +
EXTRA_QUEUED_CALLS)
self._result_queue = multiprocessing.Queue()
self._work_ids = queue.Queue()
self._queue_management_thread = None
self._processes = set()
# Shutdown is a two-step process.
self._shutdown_thread = False
self._shutdown_lock = threading.Lock()
self._queue_count = 0
self._pending_work_items = {}
def _start_queue_management_thread(self):
# When the executor gets lost, the weakref callback will wake up
# the queue management thread.
def weakref_cb(_, q=self._result_queue):
q.put(None)
if self._queue_management_thread is None:
self._queue_management_thread = threading.Thread(
target=_queue_management_worker,
args=(weakref.ref(self, weakref_cb),
self._processes,
self._pending_work_items,
self._work_ids,
self._call_queue,
self._result_queue))
self._queue_management_thread.daemon = True
self._queue_management_thread.start()
_threads_queues[self._queue_management_thread] = self._result_queue
def _adjust_process_count(self):
for _ in range(len(self._processes), self._max_workers):
p = multiprocessing.Process(
target=_process_worker,
args=(self._call_queue,
self._result_queue))
p.start()
self._processes.add(p)
def submit(self, fn, *args, **kwargs):
with self._shutdown_lock:
if self._shutdown_thread:
raise RuntimeError('cannot schedule new futures after shutdown')
f = _base.Future()
w = _WorkItem(f, fn, args, kwargs)
self._pending_work_items[self._queue_count] = w
self._work_ids.put(self._queue_count)
self._queue_count += 1
# Wake up queue management thread
self._result_queue.put(None)
self._start_queue_management_thread()
self._adjust_process_count()
return f
submit.__doc__ = _base.Executor.submit.__doc__
def shutdown(self, wait=True):
with self._shutdown_lock:
self._shutdown_thread = True
if self._queue_management_thread:
# Wake up queue management thread
self._result_queue.put(None)
if wait:
self._queue_management_thread.join()
# To reduce the risk of openning too many files, remove references to
# objects that use file descriptors.
self._queue_management_thread = None
self._call_queue = None
self._result_queue = None
self._processes = None
shutdown.__doc__ = _base.Executor.shutdown.__doc__
atexit.register(_python_exit)

View file

@ -0,0 +1,138 @@
# Copyright 2009 Brian Quinlan. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Implements ThreadPoolExecutor."""
from __future__ import with_statement
import atexit
import threading
import weakref
import sys
from concurrent.futures import _base
try:
import queue
except ImportError:
import Queue as queue
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
# Workers are created as daemon threads. This is done to allow the interpreter
# to exit when there are still idle threads in a ThreadPoolExecutor's thread
# pool (i.e. shutdown() was not called). However, allowing workers to die with
# the interpreter has two undesirable properties:
# - The workers would still be running during interpretor shutdown,
# meaning that they would fail in unpredictable ways.
# - The workers could be killed while evaluating a work item, which could
# be bad if the callable being evaluated has external side-effects e.g.
# writing to a file.
#
# To work around this problem, an exit handler is installed which tells the
# workers to exit when their work queues are empty and then waits until the
# threads finish.
_threads_queues = weakref.WeakKeyDictionary()
_shutdown = False
def _python_exit():
global _shutdown
_shutdown = True
items = list(_threads_queues.items())
for t, q in items:
q.put(None)
for t, q in items:
t.join()
atexit.register(_python_exit)
class _WorkItem(object):
def __init__(self, future, fn, args, kwargs):
self.future = future
self.fn = fn
self.args = args
self.kwargs = kwargs
def run(self):
if not self.future.set_running_or_notify_cancel():
return
try:
result = self.fn(*self.args, **self.kwargs)
except BaseException:
e = sys.exc_info()[1]
self.future.set_exception(e)
else:
self.future.set_result(result)
def _worker(executor_reference, work_queue):
try:
while True:
work_item = work_queue.get(block=True)
if work_item is not None:
work_item.run()
continue
executor = executor_reference()
# Exit if:
# - The interpreter is shutting down OR
# - The executor that owns the worker has been collected OR
# - The executor that owns the worker has been shutdown.
if _shutdown or executor is None or executor._shutdown:
# Notice other workers
work_queue.put(None)
return
del executor
except BaseException:
_base.LOGGER.critical('Exception in worker', exc_info=True)
class ThreadPoolExecutor(_base.Executor):
def __init__(self, max_workers):
"""Initializes a new ThreadPoolExecutor instance.
Args:
max_workers: The maximum number of threads that can be used to
execute the given calls.
"""
self._max_workers = max_workers
self._work_queue = queue.Queue()
self._threads = set()
self._shutdown = False
self._shutdown_lock = threading.Lock()
def submit(self, fn, *args, **kwargs):
with self._shutdown_lock:
if self._shutdown:
raise RuntimeError('cannot schedule new futures after shutdown')
f = _base.Future()
w = _WorkItem(f, fn, args, kwargs)
self._work_queue.put(w)
self._adjust_thread_count()
return f
submit.__doc__ = _base.Executor.submit.__doc__
def _adjust_thread_count(self):
# When the executor gets lost, the weakref callback will wake up
# the worker threads.
def weakref_cb(_, q=self._work_queue):
q.put(None)
# TODO(bquinlan): Should avoid creating new threads if there are more
# idle threads than items in the work queue.
if len(self._threads) < self._max_workers:
t = threading.Thread(target=_worker,
args=(weakref.ref(self, weakref_cb),
self._work_queue))
t.daemon = True
t.start()
self._threads.add(t)
_threads_queues[t] = self._work_queue
def shutdown(self, wait=True):
with self._shutdown_lock:
self._shutdown = True
self._work_queue.put(None)
if wait:
for t in self._threads:
t.join()
shutdown.__doc__ = _base.Executor.shutdown.__doc__

View file

@ -0,0 +1,9 @@
"""
Copyright (c) 2003-2010 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
__version__ = "1.5"

92
libs/dateutil/easter.py Normal file
View file

@ -0,0 +1,92 @@
"""
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
import datetime
__all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]
EASTER_JULIAN = 1
EASTER_ORTHODOX = 2
EASTER_WESTERN = 3
def easter(year, method=EASTER_WESTERN):
"""
This method was ported from the work done by GM Arts,
on top of the algorithm by Claus Tondering, which was
based in part on the algorithm of Ouding (1940), as
quoted in "Explanatory Supplement to the Astronomical
Almanac", P. Kenneth Seidelmann, editor.
This algorithm implements three different easter
calculation methods:
1 - Original calculation in Julian calendar, valid in
dates after 326 AD
2 - Original method, with date converted to Gregorian
calendar, valid in years 1583 to 4099
3 - Revised method, in Gregorian calendar, valid in
years 1583 to 4099 as well
These methods are represented by the constants:
EASTER_JULIAN = 1
EASTER_ORTHODOX = 2
EASTER_WESTERN = 3
The default method is method 3.
More about the algorithm may be found at:
http://users.chariot.net.au/~gmarts/eastalg.htm
and
http://www.tondering.dk/claus/calendar.html
"""
if not (1 <= method <= 3):
raise ValueError, "invalid method"
# g - Golden year - 1
# c - Century
# h - (23 - Epact) mod 30
# i - Number of days from March 21 to Paschal Full Moon
# j - Weekday for PFM (0=Sunday, etc)
# p - Number of days from March 21 to Sunday on or before PFM
# (-6 to 28 methods 1 & 3, to 56 for method 2)
# e - Extra days to add for method 2 (converting Julian
# date to Gregorian date)
y = year
g = y % 19
e = 0
if method < 3:
# Old method
i = (19*g+15)%30
j = (y+y//4+i)%7
if method == 2:
# Extra dates to convert Julian to Gregorian date
e = 10
if y > 1600:
e = e+y//100-16-(y//100-16)//4
else:
# New method
c = y//100
h = (c-c//4-(8*c+13)//25+19*g+15)%30
i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11))
j = (y+y//4+i+2-c+c//4)%7
# p can be from -6 to 56 corresponding to dates 22 March to 23 May
# (later dates apply to method 2, although 23 May never actually occurs)
p = i-j+e
d = 1+(p+27+(p+6)//40)%31
m = 3+(p+26)//30
return datetime.date(int(y),int(m),int(d))

886
libs/dateutil/parser.py Normal file
View file

@ -0,0 +1,886 @@
# -*- coding:iso-8859-1 -*-
"""
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
import datetime
import string
import time
import sys
import os
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
import relativedelta
import tz
__all__ = ["parse", "parserinfo"]
# Some pointers:
#
# http://www.cl.cam.ac.uk/~mgk25/iso-time.html
# http://www.iso.ch/iso/en/prods-services/popstds/datesandtime.html
# http://www.w3.org/TR/NOTE-datetime
# http://ringmaster.arc.nasa.gov/tools/time_formats.html
# http://search.cpan.org/author/MUIR/Time-modules-2003.0211/lib/Time/ParseDate.pm
# http://stein.cshl.org/jade/distrib/docs/java.text.SimpleDateFormat.html
class _timelex(object):
def __init__(self, instream):
if isinstance(instream, basestring):
instream = StringIO(instream)
self.instream = instream
self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
'ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
self.numchars = '0123456789'
self.whitespace = ' \t\r\n'
self.charstack = []
self.tokenstack = []
self.eof = False
def get_token(self):
if self.tokenstack:
return self.tokenstack.pop(0)
seenletters = False
token = None
state = None
wordchars = self.wordchars
numchars = self.numchars
whitespace = self.whitespace
while not self.eof:
if self.charstack:
nextchar = self.charstack.pop(0)
else:
nextchar = self.instream.read(1)
while nextchar == '\x00':
nextchar = self.instream.read(1)
if not nextchar:
self.eof = True
break
elif not state:
token = nextchar
if nextchar in wordchars:
state = 'a'
elif nextchar in numchars:
state = '0'
elif nextchar in whitespace:
token = ' '
break # emit token
else:
break # emit token
elif state == 'a':
seenletters = True
if nextchar in wordchars:
token += nextchar
elif nextchar == '.':
token += nextchar
state = 'a.'
else:
self.charstack.append(nextchar)
break # emit token
elif state == '0':
if nextchar in numchars:
token += nextchar
elif nextchar == '.':
token += nextchar
state = '0.'
else:
self.charstack.append(nextchar)
break # emit token
elif state == 'a.':
seenletters = True
if nextchar == '.' or nextchar in wordchars:
token += nextchar
elif nextchar in numchars and token[-1] == '.':
token += nextchar
state = '0.'
else:
self.charstack.append(nextchar)
break # emit token
elif state == '0.':
if nextchar == '.' or nextchar in numchars:
token += nextchar
elif nextchar in wordchars and token[-1] == '.':
token += nextchar
state = 'a.'
else:
self.charstack.append(nextchar)
break # emit token
if (state in ('a.', '0.') and
(seenletters or token.count('.') > 1 or token[-1] == '.')):
l = token.split('.')
token = l[0]
for tok in l[1:]:
self.tokenstack.append('.')
if tok:
self.tokenstack.append(tok)
return token
def __iter__(self):
return self
def next(self):
token = self.get_token()
if token is None:
raise StopIteration
return token
def split(cls, s):
return list(cls(s))
split = classmethod(split)
class _resultbase(object):
def __init__(self):
for attr in self.__slots__:
setattr(self, attr, None)
def _repr(self, classname):
l = []
for attr in self.__slots__:
value = getattr(self, attr)
if value is not None:
l.append("%s=%s" % (attr, `value`))
return "%s(%s)" % (classname, ", ".join(l))
def __repr__(self):
return self._repr(self.__class__.__name__)
class parserinfo(object):
# m from a.m/p.m, t from ISO T separator
JUMP = [" ", ".", ",", ";", "-", "/", "'",
"at", "on", "and", "ad", "m", "t", "of",
"st", "nd", "rd", "th"]
WEEKDAYS = [("Mon", "Monday"),
("Tue", "Tuesday"),
("Wed", "Wednesday"),
("Thu", "Thursday"),
("Fri", "Friday"),
("Sat", "Saturday"),
("Sun", "Sunday")]
MONTHS = [("Jan", "January"),
("Feb", "February"),
("Mar", "March"),
("Apr", "April"),
("May", "May"),
("Jun", "June"),
("Jul", "July"),
("Aug", "August"),
("Sep", "September"),
("Oct", "October"),
("Nov", "November"),
("Dec", "December")]
HMS = [("h", "hour", "hours"),
("m", "minute", "minutes"),
("s", "second", "seconds")]
AMPM = [("am", "a"),
("pm", "p")]
UTCZONE = ["UTC", "GMT", "Z"]
PERTAIN = ["of"]
TZOFFSET = {}
def __init__(self, dayfirst=False, yearfirst=False):
self._jump = self._convert(self.JUMP)
self._weekdays = self._convert(self.WEEKDAYS)
self._months = self._convert(self.MONTHS)
self._hms = self._convert(self.HMS)
self._ampm = self._convert(self.AMPM)
self._utczone = self._convert(self.UTCZONE)
self._pertain = self._convert(self.PERTAIN)
self.dayfirst = dayfirst
self.yearfirst = yearfirst
self._year = time.localtime().tm_year
self._century = self._year//100*100
def _convert(self, lst):
dct = {}
for i in range(len(lst)):
v = lst[i]
if isinstance(v, tuple):
for v in v:
dct[v.lower()] = i
else:
dct[v.lower()] = i
return dct
def jump(self, name):
return name.lower() in self._jump
def weekday(self, name):
if len(name) >= 3:
try:
return self._weekdays[name.lower()]
except KeyError:
pass
return None
def month(self, name):
if len(name) >= 3:
try:
return self._months[name.lower()]+1
except KeyError:
pass
return None
def hms(self, name):
try:
return self._hms[name.lower()]
except KeyError:
return None
def ampm(self, name):
try:
return self._ampm[name.lower()]
except KeyError:
return None
def pertain(self, name):
return name.lower() in self._pertain
def utczone(self, name):
return name.lower() in self._utczone
def tzoffset(self, name):
if name in self._utczone:
return 0
return self.TZOFFSET.get(name)
def convertyear(self, year):
if year < 100:
year += self._century
if abs(year-self._year) >= 50:
if year < self._year:
year += 100
else:
year -= 100
return year
def validate(self, res):
# move to info
if res.year is not None:
res.year = self.convertyear(res.year)
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
res.tzname = "UTC"
res.tzoffset = 0
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
res.tzoffset = 0
return True
class parser(object):
def __init__(self, info=None):
self.info = info or parserinfo()
def parse(self, timestr, default=None,
ignoretz=False, tzinfos=None,
**kwargs):
if not default:
default = datetime.datetime.now().replace(hour=0, minute=0,
second=0, microsecond=0)
res = self._parse(timestr, **kwargs)
if res is None:
raise ValueError, "unknown string format"
repl = {}
for attr in ["year", "month", "day", "hour",
"minute", "second", "microsecond"]:
value = getattr(res, attr)
if value is not None:
repl[attr] = value
ret = default.replace(**repl)
if res.weekday is not None and not res.day:
ret = ret+relativedelta.relativedelta(weekday=res.weekday)
if not ignoretz:
if callable(tzinfos) or tzinfos and res.tzname in tzinfos:
if callable(tzinfos):
tzdata = tzinfos(res.tzname, res.tzoffset)
else:
tzdata = tzinfos.get(res.tzname)
if isinstance(tzdata, datetime.tzinfo):
tzinfo = tzdata
elif isinstance(tzdata, basestring):
tzinfo = tz.tzstr(tzdata)
elif isinstance(tzdata, int):
tzinfo = tz.tzoffset(res.tzname, tzdata)
else:
raise ValueError, "offset must be tzinfo subclass, " \
"tz string, or int offset"
ret = ret.replace(tzinfo=tzinfo)
elif res.tzname and res.tzname in time.tzname:
ret = ret.replace(tzinfo=tz.tzlocal())
elif res.tzoffset == 0:
ret = ret.replace(tzinfo=tz.tzutc())
elif res.tzoffset:
ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
return ret
class _result(_resultbase):
__slots__ = ["year", "month", "day", "weekday",
"hour", "minute", "second", "microsecond",
"tzname", "tzoffset"]
def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False):
info = self.info
if dayfirst is None:
dayfirst = info.dayfirst
if yearfirst is None:
yearfirst = info.yearfirst
res = self._result()
l = _timelex.split(timestr)
try:
# year/month/day list
ymd = []
# Index of the month string in ymd
mstridx = -1
len_l = len(l)
i = 0
while i < len_l:
# Check if it's a number
try:
value_repr = l[i]
value = float(value_repr)
except ValueError:
value = None
if value is not None:
# Token is a number
len_li = len(l[i])
i += 1
if (len(ymd) == 3 and len_li in (2, 4)
and (i >= len_l or (l[i] != ':' and
info.hms(l[i]) is None))):
# 19990101T23[59]
s = l[i-1]
res.hour = int(s[:2])
if len_li == 4:
res.minute = int(s[2:])
elif len_li == 6 or (len_li > 6 and l[i-1].find('.') == 6):
# YYMMDD or HHMMSS[.ss]
s = l[i-1]
if not ymd and l[i-1].find('.') == -1:
ymd.append(info.convertyear(int(s[:2])))
ymd.append(int(s[2:4]))
ymd.append(int(s[4:]))
else:
# 19990101T235959[.59]
res.hour = int(s[:2])
res.minute = int(s[2:4])
res.second, res.microsecond = _parsems(s[4:])
elif len_li == 8:
# YYYYMMDD
s = l[i-1]
ymd.append(int(s[:4]))
ymd.append(int(s[4:6]))
ymd.append(int(s[6:]))
elif len_li in (12, 14):
# YYYYMMDDhhmm[ss]
s = l[i-1]
ymd.append(int(s[:4]))
ymd.append(int(s[4:6]))
ymd.append(int(s[6:8]))
res.hour = int(s[8:10])
res.minute = int(s[10:12])
if len_li == 14:
res.second = int(s[12:])
elif ((i < len_l and info.hms(l[i]) is not None) or
(i+1 < len_l and l[i] == ' ' and
info.hms(l[i+1]) is not None)):
# HH[ ]h or MM[ ]m or SS[.ss][ ]s
if l[i] == ' ':
i += 1
idx = info.hms(l[i])
while True:
if idx == 0:
res.hour = int(value)
if value%1:
res.minute = int(60*(value%1))
elif idx == 1:
res.minute = int(value)
if value%1:
res.second = int(60*(value%1))
elif idx == 2:
res.second, res.microsecond = \
_parsems(value_repr)
i += 1
if i >= len_l or idx == 2:
break
# 12h00
try:
value_repr = l[i]
value = float(value_repr)
except ValueError:
break
else:
i += 1
idx += 1
if i < len_l:
newidx = info.hms(l[i])
if newidx is not None:
idx = newidx
elif i+1 < len_l and l[i] == ':':
# HH:MM[:SS[.ss]]
res.hour = int(value)
i += 1
value = float(l[i])
res.minute = int(value)
if value%1:
res.second = int(60*(value%1))
i += 1
if i < len_l and l[i] == ':':
res.second, res.microsecond = _parsems(l[i+1])
i += 2
elif i < len_l and l[i] in ('-', '/', '.'):
sep = l[i]
ymd.append(int(value))
i += 1
if i < len_l and not info.jump(l[i]):
try:
# 01-01[-01]
ymd.append(int(l[i]))
except ValueError:
# 01-Jan[-01]
value = info.month(l[i])
if value is not None:
ymd.append(value)
assert mstridx == -1
mstridx = len(ymd)-1
else:
return None
i += 1
if i < len_l and l[i] == sep:
# We have three members
i += 1
value = info.month(l[i])
if value is not None:
ymd.append(value)
mstridx = len(ymd)-1
assert mstridx == -1
else:
ymd.append(int(l[i]))
i += 1
elif i >= len_l or info.jump(l[i]):
if i+1 < len_l and info.ampm(l[i+1]) is not None:
# 12 am
res.hour = int(value)
if res.hour < 12 and info.ampm(l[i+1]) == 1:
res.hour += 12
elif res.hour == 12 and info.ampm(l[i+1]) == 0:
res.hour = 0
i += 1
else:
# Year, month or day
ymd.append(int(value))
i += 1
elif info.ampm(l[i]) is not None:
# 12am
res.hour = int(value)
if res.hour < 12 and info.ampm(l[i]) == 1:
res.hour += 12
elif res.hour == 12 and info.ampm(l[i]) == 0:
res.hour = 0
i += 1
elif not fuzzy:
return None
else:
i += 1
continue
# Check weekday
value = info.weekday(l[i])
if value is not None:
res.weekday = value
i += 1
continue
# Check month name
value = info.month(l[i])
if value is not None:
ymd.append(value)
assert mstridx == -1
mstridx = len(ymd)-1
i += 1
if i < len_l:
if l[i] in ('-', '/'):
# Jan-01[-99]
sep = l[i]
i += 1
ymd.append(int(l[i]))
i += 1
if i < len_l and l[i] == sep:
# Jan-01-99
i += 1
ymd.append(int(l[i]))
i += 1
elif (i+3 < len_l and l[i] == l[i+2] == ' '
and info.pertain(l[i+1])):
# Jan of 01
# In this case, 01 is clearly year
try:
value = int(l[i+3])
except ValueError:
# Wrong guess
pass
else:
# Convert it here to become unambiguous
ymd.append(info.convertyear(value))
i += 4
continue
# Check am/pm
value = info.ampm(l[i])
if value is not None:
if value == 1 and res.hour < 12:
res.hour += 12
elif value == 0 and res.hour == 12:
res.hour = 0
i += 1
continue
# Check for a timezone name
if (res.hour is not None and len(l[i]) <= 5 and
res.tzname is None and res.tzoffset is None and
not [x for x in l[i] if x not in string.ascii_uppercase]):
res.tzname = l[i]
res.tzoffset = info.tzoffset(res.tzname)
i += 1
# Check for something like GMT+3, or BRST+3. Notice
# that it doesn't mean "I am 3 hours after GMT", but
# "my time +3 is GMT". If found, we reverse the
# logic so that timezone parsing code will get it
# right.
if i < len_l and l[i] in ('+', '-'):
l[i] = ('+', '-')[l[i] == '+']
res.tzoffset = None
if info.utczone(res.tzname):
# With something like GMT+3, the timezone
# is *not* GMT.
res.tzname = None
continue
# Check for a numbered timezone
if res.hour is not None and l[i] in ('+', '-'):
signal = (-1,1)[l[i] == '+']
i += 1
len_li = len(l[i])
if len_li == 4:
# -0300
res.tzoffset = int(l[i][:2])*3600+int(l[i][2:])*60
elif i+1 < len_l and l[i+1] == ':':
# -03:00
res.tzoffset = int(l[i])*3600+int(l[i+2])*60
i += 2
elif len_li <= 2:
# -[0]3
res.tzoffset = int(l[i][:2])*3600
else:
return None
i += 1
res.tzoffset *= signal
# Look for a timezone name between parenthesis
if (i+3 < len_l and
info.jump(l[i]) and l[i+1] == '(' and l[i+3] == ')' and
3 <= len(l[i+2]) <= 5 and
not [x for x in l[i+2]
if x not in string.ascii_uppercase]):
# -0300 (BRST)
res.tzname = l[i+2]
i += 4
continue
# Check jumps
if not (info.jump(l[i]) or fuzzy):
return None
i += 1
# Process year/month/day
len_ymd = len(ymd)
if len_ymd > 3:
# More than three members!?
return None
elif len_ymd == 1 or (mstridx != -1 and len_ymd == 2):
# One member, or two members with a month string
if mstridx != -1:
res.month = ymd[mstridx]
del ymd[mstridx]
if len_ymd > 1 or mstridx == -1:
if ymd[0] > 31:
res.year = ymd[0]
else:
res.day = ymd[0]
elif len_ymd == 2:
# Two members with numbers
if ymd[0] > 31:
# 99-01
res.year, res.month = ymd
elif ymd[1] > 31:
# 01-99
res.month, res.year = ymd
elif dayfirst and ymd[1] <= 12:
# 13-01
res.day, res.month = ymd
else:
# 01-13
res.month, res.day = ymd
if len_ymd == 3:
# Three members
if mstridx == 0:
res.month, res.day, res.year = ymd
elif mstridx == 1:
if ymd[0] > 31 or (yearfirst and ymd[2] <= 31):
# 99-Jan-01
res.year, res.month, res.day = ymd
else:
# 01-Jan-01
# Give precendence to day-first, since
# two-digit years is usually hand-written.
res.day, res.month, res.year = ymd
elif mstridx == 2:
# WTF!?
if ymd[1] > 31:
# 01-99-Jan
res.day, res.year, res.month = ymd
else:
# 99-01-Jan
res.year, res.day, res.month = ymd
else:
if ymd[0] > 31 or \
(yearfirst and ymd[1] <= 12 and ymd[2] <= 31):
# 99-01-01
res.year, res.month, res.day = ymd
elif ymd[0] > 12 or (dayfirst and ymd[1] <= 12):
# 13-01-01
res.day, res.month, res.year = ymd
else:
# 01-13-01
res.month, res.day, res.year = ymd
except (IndexError, ValueError, AssertionError):
return None
if not info.validate(res):
return None
return res
DEFAULTPARSER = parser()
def parse(timestr, parserinfo=None, **kwargs):
if parserinfo:
return parser(parserinfo).parse(timestr, **kwargs)
else:
return DEFAULTPARSER.parse(timestr, **kwargs)
class _tzparser(object):
class _result(_resultbase):
__slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
"start", "end"]
class _attr(_resultbase):
__slots__ = ["month", "week", "weekday",
"yday", "jyday", "day", "time"]
def __repr__(self):
return self._repr("")
def __init__(self):
_resultbase.__init__(self)
self.start = self._attr()
self.end = self._attr()
def parse(self, tzstr):
res = self._result()
l = _timelex.split(tzstr)
try:
len_l = len(l)
i = 0
while i < len_l:
# BRST+3[BRDT[+2]]
j = i
while j < len_l and not [x for x in l[j]
if x in "0123456789:,-+"]:
j += 1
if j != i:
if not res.stdabbr:
offattr = "stdoffset"
res.stdabbr = "".join(l[i:j])
else:
offattr = "dstoffset"
res.dstabbr = "".join(l[i:j])
i = j
if (i < len_l and
(l[i] in ('+', '-') or l[i][0] in "0123456789")):
if l[i] in ('+', '-'):
# Yes, that's right. See the TZ variable
# documentation.
signal = (1,-1)[l[i] == '+']
i += 1
else:
signal = -1
len_li = len(l[i])
if len_li == 4:
# -0300
setattr(res, offattr,
(int(l[i][:2])*3600+int(l[i][2:])*60)*signal)
elif i+1 < len_l and l[i+1] == ':':
# -03:00
setattr(res, offattr,
(int(l[i])*3600+int(l[i+2])*60)*signal)
i += 2
elif len_li <= 2:
# -[0]3
setattr(res, offattr,
int(l[i][:2])*3600*signal)
else:
return None
i += 1
if res.dstabbr:
break
else:
break
if i < len_l:
for j in range(i, len_l):
if l[j] == ';': l[j] = ','
assert l[i] == ','
i += 1
if i >= len_l:
pass
elif (8 <= l.count(',') <= 9 and
not [y for x in l[i:] if x != ','
for y in x if y not in "0123456789"]):
# GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
for x in (res.start, res.end):
x.month = int(l[i])
i += 2
if l[i] == '-':
value = int(l[i+1])*-1
i += 1
else:
value = int(l[i])
i += 2
if value:
x.week = value
x.weekday = (int(l[i])-1)%7
else:
x.day = int(l[i])
i += 2
x.time = int(l[i])
i += 2
if i < len_l:
if l[i] in ('-','+'):
signal = (-1,1)[l[i] == "+"]
i += 1
else:
signal = 1
res.dstoffset = (res.stdoffset+int(l[i]))*signal
elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
not [y for x in l[i:] if x not in (',','/','J','M',
'.','-',':')
for y in x if y not in "0123456789"]):
for x in (res.start, res.end):
if l[i] == 'J':
# non-leap year day (1 based)
i += 1
x.jyday = int(l[i])
elif l[i] == 'M':
# month[-.]week[-.]weekday
i += 1
x.month = int(l[i])
i += 1
assert l[i] in ('-', '.')
i += 1
x.week = int(l[i])
if x.week == 5:
x.week = -1
i += 1
assert l[i] in ('-', '.')
i += 1
x.weekday = (int(l[i])-1)%7
else:
# year day (zero based)
x.yday = int(l[i])+1
i += 1
if i < len_l and l[i] == '/':
i += 1
# start time
len_li = len(l[i])
if len_li == 4:
# -0300
x.time = (int(l[i][:2])*3600+int(l[i][2:])*60)
elif i+1 < len_l and l[i+1] == ':':
# -03:00
x.time = int(l[i])*3600+int(l[i+2])*60
i += 2
if i+1 < len_l and l[i+1] == ':':
i += 2
x.time += int(l[i])
elif len_li <= 2:
# -[0]3
x.time = (int(l[i][:2])*3600)
else:
return None
i += 1
assert i == len_l or l[i] == ','
i += 1
assert i >= len_l
except (IndexError, ValueError, AssertionError):
return None
return res
DEFAULTTZPARSER = _tzparser()
def _parsetz(tzstr):
return DEFAULTTZPARSER.parse(tzstr)
def _parsems(value):
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
if "." not in value:
return int(value), 0
else:
i, f = value.split(".")
return int(i), int(f.ljust(6, "0")[:6])
# vim:ts=4:sw=4:et

View file

@ -0,0 +1,432 @@
"""
Copyright (c) 2003-2010 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
import datetime
import calendar
__all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
class weekday(object):
__slots__ = ["weekday", "n"]
def __init__(self, weekday, n=None):
self.weekday = weekday
self.n = n
def __call__(self, n):
if n == self.n:
return self
else:
return self.__class__(self.weekday, n)
def __eq__(self, other):
try:
if self.weekday != other.weekday or self.n != other.n:
return False
except AttributeError:
return False
return True
def __repr__(self):
s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
if not self.n:
return s
else:
return "%s(%+d)" % (s, self.n)
MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)])
class relativedelta:
"""
The relativedelta type is based on the specification of the excelent
work done by M.-A. Lemburg in his mx.DateTime extension. However,
notice that this type does *NOT* implement the same algorithm as
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
There's two different ways to build a relativedelta instance. The
first one is passing it two date/datetime classes:
relativedelta(datetime1, datetime2)
And the other way is to use the following keyword arguments:
year, month, day, hour, minute, second, microsecond:
Absolute information.
years, months, weeks, days, hours, minutes, seconds, microseconds:
Relative information, may be negative.
weekday:
One of the weekday instances (MO, TU, etc). These instances may
receive a parameter N, specifying the Nth weekday, which could
be positive or negative (like MO(+1) or MO(-2). Not specifying
it is the same as specifying +1. You can also use an integer,
where 0=MO.
leapdays:
Will add given days to the date found, if year is a leap
year, and the date found is post 28 of february.
yearday, nlyearday:
Set the yearday or the non-leap year day (jump leap days).
These are converted to day/month/leapdays information.
Here is the behavior of operations with relativedelta:
1) Calculate the absolute year, using the 'year' argument, or the
original datetime year, if the argument is not present.
2) Add the relative 'years' argument to the absolute year.
3) Do steps 1 and 2 for month/months.
4) Calculate the absolute day, using the 'day' argument, or the
original datetime day, if the argument is not present. Then,
subtract from the day until it fits in the year and month
found after their operations.
5) Add the relative 'days' argument to the absolute day. Notice
that the 'weeks' argument is multiplied by 7 and added to
'days'.
6) Do steps 1 and 2 for hour/hours, minute/minutes, second/seconds,
microsecond/microseconds.
7) If the 'weekday' argument is present, calculate the weekday,
with the given (wday, nth) tuple. wday is the index of the
weekday (0-6, 0=Mon), and nth is the number of weeks to add
forward or backward, depending on its signal. Notice that if
the calculated date is already Monday, for example, using
(0, 1) or (0, -1) won't change the day.
"""
def __init__(self, dt1=None, dt2=None,
years=0, months=0, days=0, leapdays=0, weeks=0,
hours=0, minutes=0, seconds=0, microseconds=0,
year=None, month=None, day=None, weekday=None,
yearday=None, nlyearday=None,
hour=None, minute=None, second=None, microsecond=None):
if dt1 and dt2:
if not isinstance(dt1, datetime.date) or \
not isinstance(dt2, datetime.date):
raise TypeError, "relativedelta only diffs datetime/date"
if type(dt1) is not type(dt2):
if not isinstance(dt1, datetime.datetime):
dt1 = datetime.datetime.fromordinal(dt1.toordinal())
elif not isinstance(dt2, datetime.datetime):
dt2 = datetime.datetime.fromordinal(dt2.toordinal())
self.years = 0
self.months = 0
self.days = 0
self.leapdays = 0
self.hours = 0
self.minutes = 0
self.seconds = 0
self.microseconds = 0
self.year = None
self.month = None
self.day = None
self.weekday = None
self.hour = None
self.minute = None
self.second = None
self.microsecond = None
self._has_time = 0
months = (dt1.year*12+dt1.month)-(dt2.year*12+dt2.month)
self._set_months(months)
dtm = self.__radd__(dt2)
if dt1 < dt2:
while dt1 > dtm:
months += 1
self._set_months(months)
dtm = self.__radd__(dt2)
else:
while dt1 < dtm:
months -= 1
self._set_months(months)
dtm = self.__radd__(dt2)
delta = dt1 - dtm
self.seconds = delta.seconds+delta.days*86400
self.microseconds = delta.microseconds
else:
self.years = years
self.months = months
self.days = days+weeks*7
self.leapdays = leapdays
self.hours = hours
self.minutes = minutes
self.seconds = seconds
self.microseconds = microseconds
self.year = year
self.month = month
self.day = day
self.hour = hour
self.minute = minute
self.second = second
self.microsecond = microsecond
if type(weekday) is int:
self.weekday = weekdays[weekday]
else:
self.weekday = weekday
yday = 0
if nlyearday:
yday = nlyearday
elif yearday:
yday = yearday
if yearday > 59:
self.leapdays = -1
if yday:
ydayidx = [31,59,90,120,151,181,212,243,273,304,334,366]
for idx, ydays in enumerate(ydayidx):
if yday <= ydays:
self.month = idx+1
if idx == 0:
self.day = yday
else:
self.day = yday-ydayidx[idx-1]
break
else:
raise ValueError, "invalid year day (%d)" % yday
self._fix()
def _fix(self):
if abs(self.microseconds) > 999999:
s = self.microseconds//abs(self.microseconds)
div, mod = divmod(self.microseconds*s, 1000000)
self.microseconds = mod*s
self.seconds += div*s
if abs(self.seconds) > 59:
s = self.seconds//abs(self.seconds)
div, mod = divmod(self.seconds*s, 60)
self.seconds = mod*s
self.minutes += div*s
if abs(self.minutes) > 59:
s = self.minutes//abs(self.minutes)
div, mod = divmod(self.minutes*s, 60)
self.minutes = mod*s
self.hours += div*s
if abs(self.hours) > 23:
s = self.hours//abs(self.hours)
div, mod = divmod(self.hours*s, 24)
self.hours = mod*s
self.days += div*s
if abs(self.months) > 11:
s = self.months//abs(self.months)
div, mod = divmod(self.months*s, 12)
self.months = mod*s
self.years += div*s
if (self.hours or self.minutes or self.seconds or self.microseconds or
self.hour is not None or self.minute is not None or
self.second is not None or self.microsecond is not None):
self._has_time = 1
else:
self._has_time = 0
def _set_months(self, months):
self.months = months
if abs(self.months) > 11:
s = self.months//abs(self.months)
div, mod = divmod(self.months*s, 12)
self.months = mod*s
self.years = div*s
else:
self.years = 0
def __radd__(self, other):
if not isinstance(other, datetime.date):
raise TypeError, "unsupported type for add operation"
elif self._has_time and not isinstance(other, datetime.datetime):
other = datetime.datetime.fromordinal(other.toordinal())
year = (self.year or other.year)+self.years
month = self.month or other.month
if self.months:
assert 1 <= abs(self.months) <= 12
month += self.months
if month > 12:
year += 1
month -= 12
elif month < 1:
year -= 1
month += 12
day = min(calendar.monthrange(year, month)[1],
self.day or other.day)
repl = {"year": year, "month": month, "day": day}
for attr in ["hour", "minute", "second", "microsecond"]:
value = getattr(self, attr)
if value is not None:
repl[attr] = value
days = self.days
if self.leapdays and month > 2 and calendar.isleap(year):
days += self.leapdays
ret = (other.replace(**repl)
+ datetime.timedelta(days=days,
hours=self.hours,
minutes=self.minutes,
seconds=self.seconds,
microseconds=self.microseconds))
if self.weekday:
weekday, nth = self.weekday.weekday, self.weekday.n or 1
jumpdays = (abs(nth)-1)*7
if nth > 0:
jumpdays += (7-ret.weekday()+weekday)%7
else:
jumpdays += (ret.weekday()-weekday)%7
jumpdays *= -1
ret += datetime.timedelta(days=jumpdays)
return ret
def __rsub__(self, other):
return self.__neg__().__radd__(other)
def __add__(self, other):
if not isinstance(other, relativedelta):
raise TypeError, "unsupported type for add operation"
return relativedelta(years=other.years+self.years,
months=other.months+self.months,
days=other.days+self.days,
hours=other.hours+self.hours,
minutes=other.minutes+self.minutes,
seconds=other.seconds+self.seconds,
microseconds=other.microseconds+self.microseconds,
leapdays=other.leapdays or self.leapdays,
year=other.year or self.year,
month=other.month or self.month,
day=other.day or self.day,
weekday=other.weekday or self.weekday,
hour=other.hour or self.hour,
minute=other.minute or self.minute,
second=other.second or self.second,
microsecond=other.second or self.microsecond)
def __sub__(self, other):
if not isinstance(other, relativedelta):
raise TypeError, "unsupported type for sub operation"
return relativedelta(years=other.years-self.years,
months=other.months-self.months,
days=other.days-self.days,
hours=other.hours-self.hours,
minutes=other.minutes-self.minutes,
seconds=other.seconds-self.seconds,
microseconds=other.microseconds-self.microseconds,
leapdays=other.leapdays or self.leapdays,
year=other.year or self.year,
month=other.month or self.month,
day=other.day or self.day,
weekday=other.weekday or self.weekday,
hour=other.hour or self.hour,
minute=other.minute or self.minute,
second=other.second or self.second,
microsecond=other.second or self.microsecond)
def __neg__(self):
return relativedelta(years=-self.years,
months=-self.months,
days=-self.days,
hours=-self.hours,
minutes=-self.minutes,
seconds=-self.seconds,
microseconds=-self.microseconds,
leapdays=self.leapdays,
year=self.year,
month=self.month,
day=self.day,
weekday=self.weekday,
hour=self.hour,
minute=self.minute,
second=self.second,
microsecond=self.microsecond)
def __nonzero__(self):
return not (not self.years and
not self.months and
not self.days and
not self.hours and
not self.minutes and
not self.seconds and
not self.microseconds and
not self.leapdays and
self.year is None and
self.month is None and
self.day is None and
self.weekday is None and
self.hour is None and
self.minute is None and
self.second is None and
self.microsecond is None)
def __mul__(self, other):
f = float(other)
return relativedelta(years=self.years*f,
months=self.months*f,
days=self.days*f,
hours=self.hours*f,
minutes=self.minutes*f,
seconds=self.seconds*f,
microseconds=self.microseconds*f,
leapdays=self.leapdays,
year=self.year,
month=self.month,
day=self.day,
weekday=self.weekday,
hour=self.hour,
minute=self.minute,
second=self.second,
microsecond=self.microsecond)
def __eq__(self, other):
if not isinstance(other, relativedelta):
return False
if self.weekday or other.weekday:
if not self.weekday or not other.weekday:
return False
if self.weekday.weekday != other.weekday.weekday:
return False
n1, n2 = self.weekday.n, other.weekday.n
if n1 != n2 and not ((not n1 or n1 == 1) and (not n2 or n2 == 1)):
return False
return (self.years == other.years and
self.months == other.months and
self.days == other.days and
self.hours == other.hours and
self.minutes == other.minutes and
self.seconds == other.seconds and
self.leapdays == other.leapdays and
self.year == other.year and
self.month == other.month and
self.day == other.day and
self.hour == other.hour and
self.minute == other.minute and
self.second == other.second and
self.microsecond == other.microsecond)
def __ne__(self, other):
return not self.__eq__(other)
def __div__(self, other):
return self.__mul__(1/float(other))
def __repr__(self):
l = []
for attr in ["years", "months", "days", "leapdays",
"hours", "minutes", "seconds", "microseconds"]:
value = getattr(self, attr)
if value:
l.append("%s=%+d" % (attr, value))
for attr in ["year", "month", "day", "weekday",
"hour", "minute", "second", "microsecond"]:
value = getattr(self, attr)
if value is not None:
l.append("%s=%s" % (attr, `value`))
return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
# vim:ts=4:sw=4:et

1097
libs/dateutil/rrule.py Normal file

File diff suppressed because it is too large Load diff

951
libs/dateutil/tz.py Normal file
View file

@ -0,0 +1,951 @@
"""
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
"""
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
import datetime
import struct
import time
import sys
import os
relativedelta = None
parser = None
rrule = None
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz"]
try:
from dateutil.tzwin import tzwin, tzwinlocal
except (ImportError, OSError):
tzwin, tzwinlocal = None, None
ZERO = datetime.timedelta(0)
EPOCHORDINAL = datetime.datetime.utcfromtimestamp(0).toordinal()
class tzutc(datetime.tzinfo):
def utcoffset(self, dt):
return ZERO
def dst(self, dt):
return ZERO
def tzname(self, dt):
return "UTC"
def __eq__(self, other):
return (isinstance(other, tzutc) or
(isinstance(other, tzoffset) and other._offset == ZERO))
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s()" % self.__class__.__name__
__reduce__ = object.__reduce__
class tzoffset(datetime.tzinfo):
def __init__(self, name, offset):
self._name = name
self._offset = datetime.timedelta(seconds=offset)
def utcoffset(self, dt):
return self._offset
def dst(self, dt):
return ZERO
def tzname(self, dt):
return self._name
def __eq__(self, other):
return (isinstance(other, tzoffset) and
self._offset == other._offset)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s(%s, %s)" % (self.__class__.__name__,
`self._name`,
self._offset.days*86400+self._offset.seconds)
__reduce__ = object.__reduce__
class tzlocal(datetime.tzinfo):
_std_offset = datetime.timedelta(seconds=-time.timezone)
if time.daylight:
_dst_offset = datetime.timedelta(seconds=-time.altzone)
else:
_dst_offset = _std_offset
def utcoffset(self, dt):
if self._isdst(dt):
return self._dst_offset
else:
return self._std_offset
def dst(self, dt):
if self._isdst(dt):
return self._dst_offset-self._std_offset
else:
return ZERO
def tzname(self, dt):
return time.tzname[self._isdst(dt)]
def _isdst(self, dt):
# We can't use mktime here. It is unstable when deciding if
# the hour near to a change is DST or not.
#
# timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour,
# dt.minute, dt.second, dt.weekday(), 0, -1))
# return time.localtime(timestamp).tm_isdst
#
# The code above yields the following result:
#
#>>> import tz, datetime
#>>> t = tz.tzlocal()
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
#'BRDT'
#>>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname()
#'BRST'
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
#'BRST'
#>>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname()
#'BRDT'
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
#'BRDT'
#
# Here is a more stable implementation:
#
timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
+ dt.hour * 3600
+ dt.minute * 60
+ dt.second)
return time.localtime(timestamp+time.timezone).tm_isdst
def __eq__(self, other):
if not isinstance(other, tzlocal):
return False
return (self._std_offset == other._std_offset and
self._dst_offset == other._dst_offset)
return True
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s()" % self.__class__.__name__
__reduce__ = object.__reduce__
class _ttinfo(object):
__slots__ = ["offset", "delta", "isdst", "abbr", "isstd", "isgmt"]
def __init__(self):
for attr in self.__slots__:
setattr(self, attr, None)
def __repr__(self):
l = []
for attr in self.__slots__:
value = getattr(self, attr)
if value is not None:
l.append("%s=%s" % (attr, `value`))
return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
def __eq__(self, other):
if not isinstance(other, _ttinfo):
return False
return (self.offset == other.offset and
self.delta == other.delta and
self.isdst == other.isdst and
self.abbr == other.abbr and
self.isstd == other.isstd and
self.isgmt == other.isgmt)
def __ne__(self, other):
return not self.__eq__(other)
def __getstate__(self):
state = {}
for name in self.__slots__:
state[name] = getattr(self, name, None)
return state
def __setstate__(self, state):
for name in self.__slots__:
if name in state:
setattr(self, name, state[name])
class tzfile(datetime.tzinfo):
# http://www.twinsun.com/tz/tz-link.htm
# ftp://elsie.nci.nih.gov/pub/tz*.tar.gz
def __init__(self, fileobj):
if isinstance(fileobj, basestring):
self._filename = fileobj
fileobj = open(fileobj)
elif hasattr(fileobj, "name"):
self._filename = fileobj.name
else:
self._filename = `fileobj`
# From tzfile(5):
#
# The time zone information files used by tzset(3)
# begin with the magic characters "TZif" to identify
# them as time zone information files, followed by
# sixteen bytes reserved for future use, followed by
# six four-byte values of type long, written in a
# ``standard'' byte order (the high-order byte
# of the value is written first).
if fileobj.read(4) != "TZif":
raise ValueError, "magic not found"
fileobj.read(16)
(
# The number of UTC/local indicators stored in the file.
ttisgmtcnt,
# The number of standard/wall indicators stored in the file.
ttisstdcnt,
# The number of leap seconds for which data is
# stored in the file.
leapcnt,
# The number of "transition times" for which data
# is stored in the file.
timecnt,
# The number of "local time types" for which data
# is stored in the file (must not be zero).
typecnt,
# The number of characters of "time zone
# abbreviation strings" stored in the file.
charcnt,
) = struct.unpack(">6l", fileobj.read(24))
# The above header is followed by tzh_timecnt four-byte
# values of type long, sorted in ascending order.
# These values are written in ``standard'' byte order.
# Each is used as a transition time (as returned by
# time(2)) at which the rules for computing local time
# change.
if timecnt:
self._trans_list = struct.unpack(">%dl" % timecnt,
fileobj.read(timecnt*4))
else:
self._trans_list = []
# Next come tzh_timecnt one-byte values of type unsigned
# char; each one tells which of the different types of
# ``local time'' types described in the file is associated
# with the same-indexed transition time. These values
# serve as indices into an array of ttinfo structures that
# appears next in the file.
if timecnt:
self._trans_idx = struct.unpack(">%dB" % timecnt,
fileobj.read(timecnt))
else:
self._trans_idx = []
# Each ttinfo structure is written as a four-byte value
# for tt_gmtoff of type long, in a standard byte
# order, followed by a one-byte value for tt_isdst
# and a one-byte value for tt_abbrind. In each
# structure, tt_gmtoff gives the number of
# seconds to be added to UTC, tt_isdst tells whether
# tm_isdst should be set by localtime(3), and
# tt_abbrind serves as an index into the array of
# time zone abbreviation characters that follow the
# ttinfo structure(s) in the file.
ttinfo = []
for i in range(typecnt):
ttinfo.append(struct.unpack(">lbb", fileobj.read(6)))
abbr = fileobj.read(charcnt)
# Then there are tzh_leapcnt pairs of four-byte
# values, written in standard byte order; the
# first value of each pair gives the time (as
# returned by time(2)) at which a leap second
# occurs; the second gives the total number of
# leap seconds to be applied after the given time.
# The pairs of values are sorted in ascending order
# by time.
# Not used, for now
if leapcnt:
leap = struct.unpack(">%dl" % (leapcnt*2),
fileobj.read(leapcnt*8))
# Then there are tzh_ttisstdcnt standard/wall
# indicators, each stored as a one-byte value;
# they tell whether the transition times associated
# with local time types were specified as standard
# time or wall clock time, and are used when
# a time zone file is used in handling POSIX-style
# time zone environment variables.
if ttisstdcnt:
isstd = struct.unpack(">%db" % ttisstdcnt,
fileobj.read(ttisstdcnt))
# Finally, there are tzh_ttisgmtcnt UTC/local
# indicators, each stored as a one-byte value;
# they tell whether the transition times associated
# with local time types were specified as UTC or
# local time, and are used when a time zone file
# is used in handling POSIX-style time zone envi-
# ronment variables.
if ttisgmtcnt:
isgmt = struct.unpack(">%db" % ttisgmtcnt,
fileobj.read(ttisgmtcnt))
# ** Everything has been read **
# Build ttinfo list
self._ttinfo_list = []
for i in range(typecnt):
gmtoff, isdst, abbrind = ttinfo[i]
# Round to full-minutes if that's not the case. Python's
# datetime doesn't accept sub-minute timezones. Check
# http://python.org/sf/1447945 for some information.
gmtoff = (gmtoff+30)//60*60
tti = _ttinfo()
tti.offset = gmtoff
tti.delta = datetime.timedelta(seconds=gmtoff)
tti.isdst = isdst
tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)]
tti.isstd = (ttisstdcnt > i and isstd[i] != 0)
tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0)
self._ttinfo_list.append(tti)
# Replace ttinfo indexes for ttinfo objects.
trans_idx = []
for idx in self._trans_idx:
trans_idx.append(self._ttinfo_list[idx])
self._trans_idx = tuple(trans_idx)
# Set standard, dst, and before ttinfos. before will be
# used when a given time is before any transitions,
# and will be set to the first non-dst ttinfo, or to
# the first dst, if all of them are dst.
self._ttinfo_std = None
self._ttinfo_dst = None
self._ttinfo_before = None
if self._ttinfo_list:
if not self._trans_list:
self._ttinfo_std = self._ttinfo_first = self._ttinfo_list[0]
else:
for i in range(timecnt-1,-1,-1):
tti = self._trans_idx[i]
if not self._ttinfo_std and not tti.isdst:
self._ttinfo_std = tti
elif not self._ttinfo_dst and tti.isdst:
self._ttinfo_dst = tti
if self._ttinfo_std and self._ttinfo_dst:
break
else:
if self._ttinfo_dst and not self._ttinfo_std:
self._ttinfo_std = self._ttinfo_dst
for tti in self._ttinfo_list:
if not tti.isdst:
self._ttinfo_before = tti
break
else:
self._ttinfo_before = self._ttinfo_list[0]
# Now fix transition times to become relative to wall time.
#
# I'm not sure about this. In my tests, the tz source file
# is setup to wall time, and in the binary file isstd and
# isgmt are off, so it should be in wall time. OTOH, it's
# always in gmt time. Let me know if you have comments
# about this.
laststdoffset = 0
self._trans_list = list(self._trans_list)
for i in range(len(self._trans_list)):
tti = self._trans_idx[i]
if not tti.isdst:
# This is std time.
self._trans_list[i] += tti.offset
laststdoffset = tti.offset
else:
# This is dst time. Convert to std.
self._trans_list[i] += laststdoffset
self._trans_list = tuple(self._trans_list)
def _find_ttinfo(self, dt, laststd=0):
timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
+ dt.hour * 3600
+ dt.minute * 60
+ dt.second)
idx = 0
for trans in self._trans_list:
if timestamp < trans:
break
idx += 1
else:
return self._ttinfo_std
if idx == 0:
return self._ttinfo_before
if laststd:
while idx > 0:
tti = self._trans_idx[idx-1]
if not tti.isdst:
return tti
idx -= 1
else:
return self._ttinfo_std
else:
return self._trans_idx[idx-1]
def utcoffset(self, dt):
if not self._ttinfo_std:
return ZERO
return self._find_ttinfo(dt).delta
def dst(self, dt):
if not self._ttinfo_dst:
return ZERO
tti = self._find_ttinfo(dt)
if not tti.isdst:
return ZERO
# The documentation says that utcoffset()-dst() must
# be constant for every dt.
return tti.delta-self._find_ttinfo(dt, laststd=1).delta
# An alternative for that would be:
#
# return self._ttinfo_dst.offset-self._ttinfo_std.offset
#
# However, this class stores historical changes in the
# dst offset, so I belive that this wouldn't be the right
# way to implement this.
def tzname(self, dt):
if not self._ttinfo_std:
return None
return self._find_ttinfo(dt).abbr
def __eq__(self, other):
if not isinstance(other, tzfile):
return False
return (self._trans_list == other._trans_list and
self._trans_idx == other._trans_idx and
self._ttinfo_list == other._ttinfo_list)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, `self._filename`)
def __reduce__(self):
if not os.path.isfile(self._filename):
raise ValueError, "Unpickable %s class" % self.__class__.__name__
return (self.__class__, (self._filename,))
class tzrange(datetime.tzinfo):
def __init__(self, stdabbr, stdoffset=None,
dstabbr=None, dstoffset=None,
start=None, end=None):
global relativedelta
if not relativedelta:
from dateutil import relativedelta
self._std_abbr = stdabbr
self._dst_abbr = dstabbr
if stdoffset is not None:
self._std_offset = datetime.timedelta(seconds=stdoffset)
else:
self._std_offset = ZERO
if dstoffset is not None:
self._dst_offset = datetime.timedelta(seconds=dstoffset)
elif dstabbr and stdoffset is not None:
self._dst_offset = self._std_offset+datetime.timedelta(hours=+1)
else:
self._dst_offset = ZERO
if dstabbr and start is None:
self._start_delta = relativedelta.relativedelta(
hours=+2, month=4, day=1, weekday=relativedelta.SU(+1))
else:
self._start_delta = start
if dstabbr and end is None:
self._end_delta = relativedelta.relativedelta(
hours=+1, month=10, day=31, weekday=relativedelta.SU(-1))
else:
self._end_delta = end
def utcoffset(self, dt):
if self._isdst(dt):
return self._dst_offset
else:
return self._std_offset
def dst(self, dt):
if self._isdst(dt):
return self._dst_offset-self._std_offset
else:
return ZERO
def tzname(self, dt):
if self._isdst(dt):
return self._dst_abbr
else:
return self._std_abbr
def _isdst(self, dt):
if not self._start_delta:
return False
year = datetime.datetime(dt.year,1,1)
start = year+self._start_delta
end = year+self._end_delta
dt = dt.replace(tzinfo=None)
if start < end:
return dt >= start and dt < end
else:
return dt >= start or dt < end
def __eq__(self, other):
if not isinstance(other, tzrange):
return False
return (self._std_abbr == other._std_abbr and
self._dst_abbr == other._dst_abbr and
self._std_offset == other._std_offset and
self._dst_offset == other._dst_offset and
self._start_delta == other._start_delta and
self._end_delta == other._end_delta)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "%s(...)" % self.__class__.__name__
__reduce__ = object.__reduce__
class tzstr(tzrange):
def __init__(self, s):
global parser
if not parser:
from dateutil import parser
self._s = s
res = parser._parsetz(s)
if res is None:
raise ValueError, "unknown string format"
# Here we break the compatibility with the TZ variable handling.
# GMT-3 actually *means* the timezone -3.
if res.stdabbr in ("GMT", "UTC"):
res.stdoffset *= -1
# We must initialize it first, since _delta() needs
# _std_offset and _dst_offset set. Use False in start/end
# to avoid building it two times.
tzrange.__init__(self, res.stdabbr, res.stdoffset,
res.dstabbr, res.dstoffset,
start=False, end=False)
if not res.dstabbr:
self._start_delta = None
self._end_delta = None
else:
self._start_delta = self._delta(res.start)
if self._start_delta:
self._end_delta = self._delta(res.end, isend=1)
def _delta(self, x, isend=0):
kwargs = {}
if x.month is not None:
kwargs["month"] = x.month
if x.weekday is not None:
kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week)
if x.week > 0:
kwargs["day"] = 1
else:
kwargs["day"] = 31
elif x.day:
kwargs["day"] = x.day
elif x.yday is not None:
kwargs["yearday"] = x.yday
elif x.jyday is not None:
kwargs["nlyearday"] = x.jyday
if not kwargs:
# Default is to start on first sunday of april, and end
# on last sunday of october.
if not isend:
kwargs["month"] = 4
kwargs["day"] = 1
kwargs["weekday"] = relativedelta.SU(+1)
else:
kwargs["month"] = 10
kwargs["day"] = 31
kwargs["weekday"] = relativedelta.SU(-1)
if x.time is not None:
kwargs["seconds"] = x.time
else:
# Default is 2AM.
kwargs["seconds"] = 7200
if isend:
# Convert to standard time, to follow the documented way
# of working with the extra hour. See the documentation
# of the tzinfo class.
delta = self._dst_offset-self._std_offset
kwargs["seconds"] -= delta.seconds+delta.days*86400
return relativedelta.relativedelta(**kwargs)
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, `self._s`)
class _tzicalvtzcomp:
def __init__(self, tzoffsetfrom, tzoffsetto, isdst,
tzname=None, rrule=None):
self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom)
self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto)
self.tzoffsetdiff = self.tzoffsetto-self.tzoffsetfrom
self.isdst = isdst
self.tzname = tzname
self.rrule = rrule
class _tzicalvtz(datetime.tzinfo):
def __init__(self, tzid, comps=[]):
self._tzid = tzid
self._comps = comps
self._cachedate = []
self._cachecomp = []
def _find_comp(self, dt):
if len(self._comps) == 1:
return self._comps[0]
dt = dt.replace(tzinfo=None)
try:
return self._cachecomp[self._cachedate.index(dt)]
except ValueError:
pass
lastcomp = None
lastcompdt = None
for comp in self._comps:
if not comp.isdst:
# Handle the extra hour in DST -> STD
compdt = comp.rrule.before(dt-comp.tzoffsetdiff, inc=True)
else:
compdt = comp.rrule.before(dt, inc=True)
if compdt and (not lastcompdt or lastcompdt < compdt):
lastcompdt = compdt
lastcomp = comp
if not lastcomp:
# RFC says nothing about what to do when a given
# time is before the first onset date. We'll look for the
# first standard component, or the first component, if
# none is found.
for comp in self._comps:
if not comp.isdst:
lastcomp = comp
break
else:
lastcomp = comp[0]
self._cachedate.insert(0, dt)
self._cachecomp.insert(0, lastcomp)
if len(self._cachedate) > 10:
self._cachedate.pop()
self._cachecomp.pop()
return lastcomp
def utcoffset(self, dt):
return self._find_comp(dt).tzoffsetto
def dst(self, dt):
comp = self._find_comp(dt)
if comp.isdst:
return comp.tzoffsetdiff
else:
return ZERO
def tzname(self, dt):
return self._find_comp(dt).tzname
def __repr__(self):
return "<tzicalvtz %s>" % `self._tzid`
__reduce__ = object.__reduce__
class tzical:
def __init__(self, fileobj):
global rrule
if not rrule:
from dateutil import rrule
if isinstance(fileobj, basestring):
self._s = fileobj
fileobj = open(fileobj)
elif hasattr(fileobj, "name"):
self._s = fileobj.name
else:
self._s = `fileobj`
self._vtz = {}
self._parse_rfc(fileobj.read())
def keys(self):
return self._vtz.keys()
def get(self, tzid=None):
if tzid is None:
keys = self._vtz.keys()
if len(keys) == 0:
raise ValueError, "no timezones defined"
elif len(keys) > 1:
raise ValueError, "more than one timezone available"
tzid = keys[0]
return self._vtz.get(tzid)
def _parse_offset(self, s):
s = s.strip()
if not s:
raise ValueError, "empty offset"
if s[0] in ('+', '-'):
signal = (-1,+1)[s[0]=='+']
s = s[1:]
else:
signal = +1
if len(s) == 4:
return (int(s[:2])*3600+int(s[2:])*60)*signal
elif len(s) == 6:
return (int(s[:2])*3600+int(s[2:4])*60+int(s[4:]))*signal
else:
raise ValueError, "invalid offset: "+s
def _parse_rfc(self, s):
lines = s.splitlines()
if not lines:
raise ValueError, "empty string"
# Unfold
i = 0
while i < len(lines):
line = lines[i].rstrip()
if not line:
del lines[i]
elif i > 0 and line[0] == " ":
lines[i-1] += line[1:]
del lines[i]
else:
i += 1
tzid = None
comps = []
invtz = False
comptype = None
for line in lines:
if not line:
continue
name, value = line.split(':', 1)
parms = name.split(';')
if not parms:
raise ValueError, "empty property name"
name = parms[0].upper()
parms = parms[1:]
if invtz:
if name == "BEGIN":
if value in ("STANDARD", "DAYLIGHT"):
# Process component
pass
else:
raise ValueError, "unknown component: "+value
comptype = value
founddtstart = False
tzoffsetfrom = None
tzoffsetto = None
rrulelines = []
tzname = None
elif name == "END":
if value == "VTIMEZONE":
if comptype:
raise ValueError, \
"component not closed: "+comptype
if not tzid:
raise ValueError, \
"mandatory TZID not found"
if not comps:
raise ValueError, \
"at least one component is needed"
# Process vtimezone
self._vtz[tzid] = _tzicalvtz(tzid, comps)
invtz = False
elif value == comptype:
if not founddtstart:
raise ValueError, \
"mandatory DTSTART not found"
if tzoffsetfrom is None:
raise ValueError, \
"mandatory TZOFFSETFROM not found"
if tzoffsetto is None:
raise ValueError, \
"mandatory TZOFFSETFROM not found"
# Process component
rr = None
if rrulelines:
rr = rrule.rrulestr("\n".join(rrulelines),
compatible=True,
ignoretz=True,
cache=True)
comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto,
(comptype == "DAYLIGHT"),
tzname, rr)
comps.append(comp)
comptype = None
else:
raise ValueError, \
"invalid component end: "+value
elif comptype:
if name == "DTSTART":
rrulelines.append(line)
founddtstart = True
elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"):
rrulelines.append(line)
elif name == "TZOFFSETFROM":
if parms:
raise ValueError, \
"unsupported %s parm: %s "%(name, parms[0])
tzoffsetfrom = self._parse_offset(value)
elif name == "TZOFFSETTO":
if parms:
raise ValueError, \
"unsupported TZOFFSETTO parm: "+parms[0]
tzoffsetto = self._parse_offset(value)
elif name == "TZNAME":
if parms:
raise ValueError, \
"unsupported TZNAME parm: "+parms[0]
tzname = value
elif name == "COMMENT":
pass
else:
raise ValueError, "unsupported property: "+name
else:
if name == "TZID":
if parms:
raise ValueError, \
"unsupported TZID parm: "+parms[0]
tzid = value
elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"):
pass
else:
raise ValueError, "unsupported property: "+name
elif name == "BEGIN" and value == "VTIMEZONE":
tzid = None
comps = []
invtz = True
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, `self._s`)
if sys.platform != "win32":
TZFILES = ["/etc/localtime", "localtime"]
TZPATHS = ["/usr/share/zoneinfo", "/usr/lib/zoneinfo", "/etc/zoneinfo"]
else:
TZFILES = []
TZPATHS = []
def gettz(name=None):
tz = None
if not name:
try:
name = os.environ["TZ"]
except KeyError:
pass
if name is None or name == ":":
for filepath in TZFILES:
if not os.path.isabs(filepath):
filename = filepath
for path in TZPATHS:
filepath = os.path.join(path, filename)
if os.path.isfile(filepath):
break
else:
continue
if os.path.isfile(filepath):
try:
tz = tzfile(filepath)
break
except (IOError, OSError, ValueError):
pass
else:
tz = tzlocal()
else:
if name.startswith(":"):
name = name[:-1]
if os.path.isabs(name):
if os.path.isfile(name):
tz = tzfile(name)
else:
tz = None
else:
for path in TZPATHS:
filepath = os.path.join(path, name)
if not os.path.isfile(filepath):
filepath = filepath.replace(' ','_')
if not os.path.isfile(filepath):
continue
try:
tz = tzfile(filepath)
break
except (IOError, OSError, ValueError):
pass
else:
tz = None
if tzwin:
try:
tz = tzwin(name)
except OSError:
pass
if not tz:
from dateutil.zoneinfo import gettz
tz = gettz(name)
if not tz:
for c in name:
# name must have at least one offset to be a tzstr
if c in "0123456789":
try:
tz = tzstr(name)
except ValueError:
pass
break
else:
if name in ("GMT", "UTC"):
tz = tzutc()
elif name in time.tzname:
tz = tzlocal()
return tz
# vim:ts=4:sw=4:et

180
libs/dateutil/tzwin.py Normal file
View file

@ -0,0 +1,180 @@
# This code was originally contributed by Jeffrey Harris.
import datetime
import struct
import _winreg
__author__ = "Jeffrey Harris & Gustavo Niemeyer <gustavo@niemeyer.net>"
__all__ = ["tzwin", "tzwinlocal"]
ONEWEEK = datetime.timedelta(7)
TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones"
TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
def _settzkeyname():
global TZKEYNAME
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
try:
_winreg.OpenKey(handle, TZKEYNAMENT).Close()
TZKEYNAME = TZKEYNAMENT
except WindowsError:
TZKEYNAME = TZKEYNAME9X
handle.Close()
_settzkeyname()
class tzwinbase(datetime.tzinfo):
"""tzinfo class based on win32's timezones available in the registry."""
def utcoffset(self, dt):
if self._isdst(dt):
return datetime.timedelta(minutes=self._dstoffset)
else:
return datetime.timedelta(minutes=self._stdoffset)
def dst(self, dt):
if self._isdst(dt):
minutes = self._dstoffset - self._stdoffset
return datetime.timedelta(minutes=minutes)
else:
return datetime.timedelta(0)
def tzname(self, dt):
if self._isdst(dt):
return self._dstname
else:
return self._stdname
def list():
"""Return a list of all time zones known to the system."""
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
tzkey = _winreg.OpenKey(handle, TZKEYNAME)
result = [_winreg.EnumKey(tzkey, i)
for i in range(_winreg.QueryInfoKey(tzkey)[0])]
tzkey.Close()
handle.Close()
return result
list = staticmethod(list)
def display(self):
return self._display
def _isdst(self, dt):
dston = picknthweekday(dt.year, self._dstmonth, self._dstdayofweek,
self._dsthour, self._dstminute,
self._dstweeknumber)
dstoff = picknthweekday(dt.year, self._stdmonth, self._stddayofweek,
self._stdhour, self._stdminute,
self._stdweeknumber)
if dston < dstoff:
return dston <= dt.replace(tzinfo=None) < dstoff
else:
return not dstoff <= dt.replace(tzinfo=None) < dston
class tzwin(tzwinbase):
def __init__(self, name):
self._name = name
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
tzkey = _winreg.OpenKey(handle, "%s\%s" % (TZKEYNAME, name))
keydict = valuestodict(tzkey)
tzkey.Close()
handle.Close()
self._stdname = keydict["Std"].encode("iso-8859-1")
self._dstname = keydict["Dlt"].encode("iso-8859-1")
self._display = keydict["Display"]
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
tup = struct.unpack("=3l16h", keydict["TZI"])
self._stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1
self._dstoffset = self._stdoffset-tup[2] # + DaylightBias * -1
(self._stdmonth,
self._stddayofweek, # Sunday = 0
self._stdweeknumber, # Last = 5
self._stdhour,
self._stdminute) = tup[4:9]
(self._dstmonth,
self._dstdayofweek, # Sunday = 0
self._dstweeknumber, # Last = 5
self._dsthour,
self._dstminute) = tup[12:17]
def __repr__(self):
return "tzwin(%s)" % repr(self._name)
def __reduce__(self):
return (self.__class__, (self._name,))
class tzwinlocal(tzwinbase):
def __init__(self):
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
tzlocalkey = _winreg.OpenKey(handle, TZLOCALKEYNAME)
keydict = valuestodict(tzlocalkey)
tzlocalkey.Close()
self._stdname = keydict["StandardName"].encode("iso-8859-1")
self._dstname = keydict["DaylightName"].encode("iso-8859-1")
try:
tzkey = _winreg.OpenKey(handle, "%s\%s"%(TZKEYNAME, self._stdname))
_keydict = valuestodict(tzkey)
self._display = _keydict["Display"]
tzkey.Close()
except OSError:
self._display = None
handle.Close()
self._stdoffset = -keydict["Bias"]-keydict["StandardBias"]
self._dstoffset = self._stdoffset-keydict["DaylightBias"]
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
tup = struct.unpack("=8h", keydict["StandardStart"])
(self._stdmonth,
self._stddayofweek, # Sunday = 0
self._stdweeknumber, # Last = 5
self._stdhour,
self._stdminute) = tup[1:6]
tup = struct.unpack("=8h", keydict["DaylightStart"])
(self._dstmonth,
self._dstdayofweek, # Sunday = 0
self._dstweeknumber, # Last = 5
self._dsthour,
self._dstminute) = tup[1:6]
def __reduce__(self):
return (self.__class__, ())
def picknthweekday(year, month, dayofweek, hour, minute, whichweek):
"""dayofweek == 0 means Sunday, whichweek 5 means last instance"""
first = datetime.datetime(year, month, 1, hour, minute)
weekdayone = first.replace(day=((dayofweek-first.isoweekday())%7+1))
for n in xrange(whichweek):
dt = weekdayone+(whichweek-n)*ONEWEEK
if dt.month == month:
return dt
def valuestodict(key):
"""Convert a registry key's values to a dictionary."""
dict = {}
size = _winreg.QueryInfoKey(key)[1]
for i in range(size):
data = _winreg.EnumValue(key, i)
dict[data[0]] = data[1]
return dict

View file

@ -0,0 +1,87 @@
"""
Copyright (c) 2003-2005 Gustavo Niemeyer <gustavo@niemeyer.net>
This module offers extensions to the standard python 2.3+
datetime module.
"""
from dateutil.tz import tzfile
from tarfile import TarFile
import os
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
__license__ = "PSF License"
__all__ = ["setcachesize", "gettz", "rebuild"]
CACHE = []
CACHESIZE = 10
class tzfile(tzfile):
def __reduce__(self):
return (gettz, (self._filename,))
def getzoneinfofile():
filenames = os.listdir(os.path.join(os.path.dirname(__file__)))
filenames.sort()
filenames.reverse()
for entry in filenames:
if entry.startswith("zoneinfo") and ".tar." in entry:
return os.path.join(os.path.dirname(__file__), entry)
return None
ZONEINFOFILE = getzoneinfofile()
del getzoneinfofile
def setcachesize(size):
global CACHESIZE, CACHE
CACHESIZE = size
del CACHE[size:]
def gettz(name):
tzinfo = None
if ZONEINFOFILE:
for cachedname, tzinfo in CACHE:
if cachedname == name:
break
else:
tf = TarFile.open(ZONEINFOFILE)
try:
zonefile = tf.extractfile(name)
except KeyError:
tzinfo = None
else:
tzinfo = tzfile(zonefile)
tf.close()
CACHE.insert(0, (name, tzinfo))
del CACHE[CACHESIZE:]
return tzinfo
def rebuild(filename, tag=None, format="gz"):
import tempfile, shutil
tmpdir = tempfile.mkdtemp()
zonedir = os.path.join(tmpdir, "zoneinfo")
moduledir = os.path.dirname(__file__)
if tag: tag = "-"+tag
targetname = "zoneinfo%s.tar.%s" % (tag, format)
try:
tf = TarFile.open(filename)
for name in tf.getnames():
if not (name.endswith(".sh") or
name.endswith(".tab") or
name == "leapseconds"):
tf.extract(name, tmpdir)
filepath = os.path.join(tmpdir, name)
os.system("zic -d %s %s" % (zonedir, filepath))
tf.close()
target = os.path.join(moduledir, targetname)
for entry in os.listdir(moduledir):
if entry.startswith("zoneinfo") and ".tar." in entry:
os.unlink(os.path.join(moduledir, entry))
tf = TarFile.open(target, "w:%s" % format)
for entry in os.listdir(zonedir):
entrypath = os.path.join(zonedir, entry)
tf.add(entrypath, entry)
tf.close()
finally:
shutil.rmtree(tmpdir)

Binary file not shown.

View file

@ -1,359 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
"""
Extracts as much information as possible from a video file.
"""
from .api import guessit, GuessItApi
from __future__ import absolute_import, division, print_function, unicode_literals
import pkg_resources
from .__version__ import __version__
__all__ = ['Guess', 'Language',
'guess_file_info', 'guess_video_info',
'guess_movie_info', 'guess_episode_info',
'default_options']
# Do python3 detection before importing any other module, to be sure that
# it will then always be available
# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
import sys
if sys.version_info[0] >= 3: # pragma: no cover
PY2, PY3 = False, True
unicode_text_type = str
native_text_type = str
base_text_type = str
def u(x):
return str(x)
def s(x):
return x
class UnicodeMixin(object):
__str__ = lambda x: x.__unicode__()
import binascii
def to_hex(x):
return binascii.hexlify(x).decode('utf-8')
else: # pragma: no cover
PY2, PY3 = True, False
__all__ = [str(s) for s in __all__] # fix imports for python2
unicode_text_type = unicode
native_text_type = str
base_text_type = basestring
def u(x):
if isinstance(x, str):
return x.decode('utf-8')
if isinstance(x, list):
return [u(s) for s in x]
return unicode(x)
def s(x):
if isinstance(x, unicode):
return x.encode('utf-8')
if isinstance(x, list):
return [s(y) for y in x]
if isinstance(x, tuple):
return tuple(s(y) for y in x)
if isinstance(x, dict):
return dict((s(key), s(value)) for key, value in x.items())
return x
class UnicodeMixin(object):
__str__ = lambda x: unicode(x).encode('utf-8')
def to_hex(x):
return x.encode('hex')
range = xrange
from guessit.guess import Guess, smart_merge
from guessit.language import Language
from guessit.matcher import IterativeMatcher
from guessit.textutils import clean_default, is_camel, from_camel
import babelfish
import os.path
import logging
from copy import deepcopy
log = logging.getLogger(__name__)
class NullHandler(logging.Handler):
def emit(self, record):
pass
# let's be a nicely behaving library
h = NullHandler()
log.addHandler(h)
def _guess_filename(filename, options=None, **kwargs):
mtree = _build_filename_mtree(filename, options=options, **kwargs)
if options.get('split_camel'):
_add_camel_properties(mtree, options=options)
return mtree.matched()
def _build_filename_mtree(filename, options=None, **kwargs):
mtree = IterativeMatcher(filename, options=options, **kwargs)
second_pass_options = mtree.second_pass_options
if second_pass_options:
log.debug("Running 2nd pass")
merged_options = dict(options)
merged_options.update(second_pass_options)
mtree = IterativeMatcher(filename, options=merged_options, **kwargs)
return mtree
def _add_camel_properties(mtree, options=None, **kwargs):
prop = 'title' if mtree.matched().get('type') != 'episode' else 'series'
value = mtree.matched().get(prop)
_guess_camel_string(mtree, value, options=options, skip_title=False, **kwargs)
for leaf in mtree.match_tree.unidentified_leaves():
value = leaf.value
_guess_camel_string(mtree, value, options=options, skip_title=True, **kwargs)
def _guess_camel_string(mtree, string, options=None, skip_title=False, **kwargs):
if string and is_camel(string):
log.debug('"%s" is camel cased. Try to detect more properties.' % (string,))
uncameled_value = from_camel(string)
merged_options = dict(options)
if 'type' in mtree.match_tree.info:
current_type = mtree.match_tree.info.get('type')
if current_type and current_type != 'unknown':
merged_options['type'] = current_type
camel_tree = _build_filename_mtree(uncameled_value, options=merged_options, name_only=True, skip_title=skip_title, **kwargs)
if len(camel_tree.matched()) > 0:
mtree.matched().update(camel_tree.matched())
return True
return False
def guess_video_metadata(filename):
"""Gets the video metadata properties out of a given file. The file needs to
exist on the filesystem to be able to be analyzed. An empty guess is
returned otherwise.
You need to have the Enzyme python package installed for this to work."""
result = Guess()
def found(prop, value):
result[prop] = value
log.debug('Found with enzyme %s: %s' % (prop, value))
# first get the size of the file, in bytes
try:
size = os.stat(filename).st_size
found('fileSize', size)
except Exception as e:
log.error('Cannot get video file size: %s' % e)
# file probably does not exist, we might as well return now
return result
# then get additional metadata from the file using enzyme, if available
try:
import enzyme
with open(filename) as f:
mkv = enzyme.MKV(f)
found('duration', mkv.info.duration.total_seconds())
if mkv.video_tracks:
video_track = mkv.video_tracks[0]
# resolution
if video_track.height in (480, 720, 1080):
if video_track.interlaced:
found('screenSize', '%di' % video_track.height)
else:
found('screenSize', '%dp' % video_track.height)
else:
# TODO: do we want this?
#found('screenSize', '%dx%d' % (video_track.width, video_track.height))
pass
# video codec
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
found('videoCodec', 'h264')
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
found('videoCodec', 'DivX')
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
found('videoCodec', 'XviD')
else:
log.warning('MKV has no video track')
if mkv.audio_tracks:
audio_track = mkv.audio_tracks[0]
# audio codec
if audio_track.codec_id == 'A_AC3':
found('audioCodec', 'AC3')
elif audio_track.codec_id == 'A_DTS':
found('audioCodec', 'DTS')
elif audio_track.codec_id == 'A_AAC':
found('audioCodec', 'AAC')
else:
log.warning('MKV has no audio track')
if mkv.subtitle_tracks:
embedded_subtitle_languages = set()
for st in mkv.subtitle_tracks:
try:
if st.language:
lang = babelfish.Language.fromalpha3b(st.language)
elif st.name:
lang = babelfish.Language.fromname(st.name)
else:
lang = babelfish.Language('und')
except babelfish.Error:
lang = babelfish.Language('und')
embedded_subtitle_languages.add(lang)
found('subtitleLanguage', embedded_subtitle_languages)
else:
log.debug('MKV has no subtitle track')
return result
except ImportError:
log.error('Cannot get video file metadata, missing dependency: enzyme')
log.error('Please install it from PyPI, by doing eg: pip install enzyme')
return result
except IOError as e:
log.error('Could not open file: %s' % filename)
log.error('Make sure it exists and is available for reading on the filesystem')
log.error('Error: %s' % e)
return result
except enzyme.Error as e:
log.error('Cannot guess video file metadata')
log.error('enzyme.Error while reading file: %s' % filename)
log.error('Error: %s' % e)
return result
default_options = {}
def guess_file_info(filename, info=None, options=None, **kwargs):
"""info can contain the names of the various plugins, such as 'filename' to
detect filename info, or 'hash_md5' to get the md5 hash of the file.
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
>>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
>>> g['hash_md5'], g['hash_sha1']
('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
"""
info = info or 'filename'
options = options or {}
if default_options:
merged_options = deepcopy(default_options)
merged_options.update(options)
options = merged_options
result = []
hashers = []
# Force unicode as soon as possible
filename = u(filename)
if isinstance(info, base_text_type):
info = [info]
for infotype in info:
if infotype == 'filename':
result.append(_guess_filename(filename, options, **kwargs))
elif infotype == 'hash_mpc':
from guessit.hash_mpc import hash_file
try:
result.append(Guess({infotype: hash_file(filename)},
confidence=1.0))
except Exception as e:
log.warning('Could not compute MPC-style hash because: %s' % e)
elif infotype == 'hash_ed2k':
from guessit.hash_ed2k import hash_file
try:
result.append(Guess({infotype: hash_file(filename)},
confidence=1.0))
except Exception as e:
log.warning('Could not compute ed2k hash because: %s' % e)
elif infotype.startswith('hash_'):
import hashlib
hashname = infotype[5:]
try:
hasher = getattr(hashlib, hashname)()
hashers.append((infotype, hasher))
except AttributeError:
log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)
elif infotype == 'video':
g = guess_video_metadata(filename)
if g:
result.append(g)
else:
log.warning('Invalid infotype: %s' % infotype)
# do all the hashes now, but on a single pass
if hashers:
try:
blocksize = 8192
hasherobjs = dict(hashers).values()
with open(filename, 'rb') as f:
chunk = f.read(blocksize)
while chunk:
for hasher in hasherobjs:
hasher.update(chunk)
chunk = f.read(blocksize)
for infotype, hasher in hashers:
result.append(Guess({infotype: hasher.hexdigest()},
confidence=1.0))
except Exception as e:
log.warning('Could not compute hash because: %s' % e)
result = smart_merge(result)
return result
def guess_video_info(filename, info=None, options=None, **kwargs):
return guess_file_info(filename, info=info, options=options, type='video', **kwargs)
def guess_movie_info(filename, info=None, options=None, **kwargs):
return guess_file_info(filename, info=info, options=options, type='movie', **kwargs)
def guess_episode_info(filename, info=None, options=None, **kwargs):
return guess_file_info(filename, info=info, options=options, type='episode', **kwargs)

View file

@ -1,58 +1,48 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
"""
Entry point module
"""
# pragma: no cover
from __future__ import print_function
from __future__ import absolute_import, division, print_function, unicode_literals
from collections import defaultdict
import json
import logging
import os
import sys
from guessit import PY2, u, guess_file_info, __version__
from guessit.options import get_opts
import six
from guessit import api
from guessit.__version__ import __version__
from guessit.jsonutils import GuessitEncoder
from guessit.options import argument_parser
from rebulk.__version__ import __version__ as __rebulk_version__
def guess_file(filename, info='filename', options=None, **kwargs):
options = options or {}
filename = u(filename)
if not options.get('yaml') and not options.get('show_property'):
def guess_filename(filename, options):
"""
Guess a single filename using given options
"""
if not options.yaml and not options.json and not options.show_property:
print('For:', filename)
guess = guess_file_info(filename, info, options, **kwargs)
if not options.get('unidentified'):
try:
del guess['unidentified']
except KeyError:
pass
cmd_options = vars(options)
cmd_options['implicit'] = True # Force implicit option in CLI
if options.get('show_property'):
print(guess.get(options.get('show_property'), ''))
guess = api.guessit(filename, vars(options))
if options.show_property:
print(guess.get(options.show_property, ''))
return
if options.get('yaml'):
if options.json:
print(json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False))
elif options.yaml:
import yaml
for k, v in guess.items():
if isinstance(v, list) and len(v) == 1:
guess[k] = v[0]
ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False)
from guessit import yamlutils
ystr = yaml.dump({filename: dict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
allow_unicode=True)
i = 0
for yline in ystr.splitlines():
if i == 0:
@ -62,222 +52,108 @@ def guess_file(filename, info='filename', options=None, **kwargs):
else:
print(yline)
i += 1
return
print('GuessIt found:', guess.nice_string(options.get('advanced')))
def _supported_properties():
all_properties = defaultdict(list)
transformers_properties = []
from guessit.plugins import transformers
for transformer in transformers.all_transformers():
supported_properties = transformer.supported_properties()
transformers_properties.append((transformer, supported_properties))
if isinstance(supported_properties, dict):
for property_name, possible_values in supported_properties.items():
all_properties[property_name].extend(possible_values)
else:
for property_name in supported_properties:
all_properties[property_name] # just make sure it exists
return all_properties, transformers_properties
def display_transformers():
print('GuessIt transformers:')
_, transformers_properties = _supported_properties()
for transformer, _ in transformers_properties:
print('[@] %s (%s)' % (transformer.name, transformer.priority))
else:
print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, indent=4, ensure_ascii=False))
def display_properties(options):
values = options.values
transformers = options.transformers
name_only = options.name_only
"""
Display properties
"""
properties = api.properties(options)
print('GuessIt properties:')
all_properties, transformers_properties = _supported_properties()
if name_only:
# the 'container' property does not apply when using the --name-only
# option
del all_properties['container']
if transformers:
for transformer, properties_list in transformers_properties:
print('[@] %s (%s)' % (transformer.name, transformer.priority))
for property_name in properties_list:
property_values = all_properties.get(property_name)
print(' [+] %s' % (property_name,))
if property_values and values:
_display_property_values(property_name, indent=4)
else:
properties_list = sorted(all_properties.keys())
for property_name in properties_list:
property_values = all_properties.get(property_name)
print(' [+] %s' % (property_name,))
if property_values and values:
_display_property_values(property_name, indent=4)
def _display_property_values(property_name, indent=2):
all_properties, _ = _supported_properties()
property_values = all_properties.get(property_name)
for property_value in property_values:
print(indent * ' ' + '[!] %s' % (property_value,))
def run_demo(episodes=True, movies=True, options=None):
# NOTE: tests should not be added here but rather in the tests/ folder
# this is just intended as a quick example
if episodes:
testeps = ['Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi',
'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi']
for f in testeps:
print('-' * 80)
guess_file(f, options=options, type='episode')
if movies:
testmovies = ['Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi',
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv',
'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv',
'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi',
'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi',
'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi',
'Movies/Juno (2007)/Juno KLAXXON.avi',
'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv',
'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt',
'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi',
'testsmewt_bugs/movies/Baraka_Edition_Collector.avi'
]
for f in testmovies:
print('-' * 80)
guess_file(f, options=options, type='movie')
def submit_bug(filename, options):
import requests # only import when needed
from requests.exceptions import RequestException
try:
opts = dict((k, v) for k, v in options.__dict__.items()
if v and k != 'submit_bug')
r = requests.post('http://localhost:5000/bugs', {'filename': filename,
'version': __version__,
'options': str(opts)})
if r.status_code == 200:
print('Successfully submitted file: %s' % r.text)
if options.json:
if options.values:
print(json.dumps(properties, cls=GuessitEncoder, ensure_ascii=False))
else:
print('Could not submit bug at the moment, please try again later.')
print(json.dumps(list(properties.keys()), cls=GuessitEncoder, ensure_ascii=False))
elif options.yaml:
import yaml
from guessit import yamlutils
if options.values:
print(yaml.dump(properties, Dumper=yamlutils.CustomDumper, default_flow_style=False, allow_unicode=True))
else:
print(yaml.dump(list(properties.keys()), Dumper=yamlutils.CustomDumper, default_flow_style=False,
allow_unicode=True))
else:
print('GuessIt properties:')
except RequestException as e:
print('Could not submit bug at the moment, please try again later.')
properties_list = list(sorted(properties.keys()))
for property_name in properties_list:
property_values = properties.get(property_name)
print(2 * ' ' + '[+] %s' % (property_name,))
if property_values and options.values:
for property_value in property_values:
print(4 * ' ' + '[!] %s' % (property_value,))
def main(args=None, setup_logging=True):
if setup_logging:
from guessit import slogging
slogging.setup_logging()
if PY2: # pragma: no cover
import codecs
import locale
import sys
def main(args=None): # pylint:disable=too-many-branches
"""
Main function for entry point
"""
if six.PY2 and os.name == 'nt': # pragma: no cover
# see http://bugs.python.org/issue2128
if os.name == 'nt':
for i, a in enumerate(sys.argv):
sys.argv[i] = a.decode(locale.getpreferredencoding())
import locale
# see https://github.com/wackou/guessit/issues/43
# and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
# Wrap sys.stdout into a StreamWriter to allow writing unicode.
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
for i, j in enumerate(sys.argv):
sys.argv[i] = j.decode(locale.getpreferredencoding())
from guessit.plugins import transformers
if args:
options = get_opts().parse_args(args)
else: # pragma: no cover
options = get_opts().parse_args()
if args is None: # pragma: no cover
options = argument_parser.parse_args()
else:
options = argument_parser.parse_args(args)
if options.verbose:
logging.basicConfig(stream=sys.stdout, format='%(message)s')
logging.getLogger().setLevel(logging.DEBUG)
help_required = True
if options.properties or options.values:
display_properties(options)
help_required = False
elif options.transformers:
display_transformers()
help_required = False
if options.demo:
run_demo(episodes=True, movies=True, options=vars(options))
help_required = False
if options.version:
print('+-------------------------------------------------------+')
print('+ GuessIt ' + __version__ + (28-len(__version__)) * ' ' + '+')
print('+ GuessIt ' + __version__ + (28 - len(__version__)) * ' ' + '+')
print('+-------------------------------------------------------+')
print('+ Rebulk ' + __rebulk_version__ + (29 - len(__rebulk_version__)) * ' ' + '+')
print('+-------------------------------------------------------+')
print('| Please report any bug or feature request at |')
print('| https://github.com/wackou/guessit/issues. |')
print('| https://github.com/guessit-io/guessit/issues. |')
print('+-------------------------------------------------------+')
help_required = False
if options.yaml:
try:
import yaml, babelfish
def default_representer(dumper, data):
return dumper.represent_str(str(data))
yaml.SafeDumper.add_representer(babelfish.Language, default_representer)
yaml.SafeDumper.add_representer(babelfish.Country, default_representer)
import yaml # pylint:disable=unused-variable
except ImportError: # pragma: no cover
print('PyYAML not found. Using default output.')
options.yaml = False
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)
if options.properties or options.values:
display_properties(options)
help_required = False
filenames = []
if options.filename:
filenames.extend(options.filename)
for filename in options.filename:
filenames.append(filename)
if options.input_file:
input_file = open(options.input_file, 'r')
if six.PY2:
input_file = open(options.input_file, 'r')
else:
input_file = open(options.input_file, 'r', encoding='utf-8')
try:
filenames.extend([line.strip() for line in input_file.readlines()])
finally:
input_file.close()
filenames = filter(lambda f: f, filenames)
filenames = list(filter(lambda f: f, filenames))
if filenames:
help_required = False
if options.submit_bug:
for filename in filenames:
submit_bug(filename, options)
else:
for filename in filenames:
guess_file(filename,
info=options.info.split(','),
options=vars(options))
for filename in filenames:
help_required = False
guess_filename(filename, options)
if help_required: # pragma: no cover
get_opts().print_help()
argument_parser.print_help()
if __name__ == '__main__':
if __name__ == '__main__': # pragma: no cover
main()

View file

@ -1,20 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
__version__ = '0.10.2.dev0'
"""
Version module
"""
# pragma: no cover
__version__ = '2.1.1.dev0'

150
libs/guessit/api.py Normal file
View file

@ -0,0 +1,150 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
API functions that can be used by external software
"""
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
import traceback
import six
from rebulk.introspector import introspect
from .rules import rebulk_builder
from .options import parse_options
from .__version__ import __version__
class GuessitException(Exception):
"""
Exception raised when guessit fails to perform a guess because of an internal error.
"""
def __init__(self, string, options):
super(GuessitException, self).__init__("An internal error has occured in guessit.\n"
"===================== Guessit Exception Report =====================\n"
"version=%s\n"
"string=%s\n"
"options=%s\n"
"--------------------------------------------------------------------\n"
"%s"
"--------------------------------------------------------------------\n"
"Please report at "
"https://github.com/guessit-io/guessit/issues.\n"
"====================================================================" %
(__version__, str(string), str(options), traceback.format_exc()))
self.string = string
self.options = options
def guessit(string, options=None):
"""
Retrieves all matches from string as a dict
:param string: the filename or release name
:type string: str
:param options: the filename or release name
:type options: str|dict
:return:
:rtype:
"""
return default_api.guessit(string, options)
def properties(options=None):
"""
Retrieves all properties with possible values that can be guessed
:param options:
:type options:
:return:
:rtype:
"""
return default_api.properties(options)
class GuessItApi(object):
"""
An api class that can be configured with custom Rebulk configuration.
"""
def __init__(self, rebulk):
"""
:param rebulk: Rebulk instance to use.
:type rebulk: Rebulk
:return:
:rtype:
"""
self.rebulk = rebulk
@staticmethod
def _fix_option_encoding(value):
if isinstance(value, list):
return [GuessItApi._fix_option_encoding(item) for item in value]
if six.PY2 and isinstance(value, six.text_type):
return value.encode("utf-8")
if six.PY3 and isinstance(value, six.binary_type):
return value.decode('ascii')
return value
def guessit(self, string, options=None):
"""
Retrieves all matches from string as a dict
:param string: the filename or release name
:type string: str
:param options: the filename or release name
:type options: str|dict
:return:
:rtype:
"""
try:
options = parse_options(options)
result_decode = False
result_encode = False
fixed_options = {}
for (key, value) in options.items():
key = GuessItApi._fix_option_encoding(key)
value = GuessItApi._fix_option_encoding(value)
fixed_options[key] = value
options = fixed_options
if six.PY2 and isinstance(string, six.text_type):
string = string.encode("utf-8")
result_decode = True
if six.PY3 and isinstance(string, six.binary_type):
string = string.decode('ascii')
result_encode = True
matches = self.rebulk.matches(string, options)
if result_decode:
for match in matches:
if isinstance(match.value, six.binary_type):
match.value = match.value.decode("utf-8")
if result_encode:
for match in matches:
if isinstance(match.value, six.text_type):
match.value = match.value.encode("ascii")
return matches.to_dict(options.get('advanced', False), options.get('implicit', False))
except:
raise GuessitException(string, options)
def properties(self, options=None):
"""
Grab properties and values that can be generated.
:param options:
:type options:
:return:
:rtype:
"""
unordered = introspect(self.rebulk, options).properties
ordered = OrderedDict()
for k in sorted(unordered.keys(), key=six.text_type):
ordered[k] = list(sorted(unordered[k], key=six.text_type))
if hasattr(self.rebulk, 'customize_properties'):
ordered = self.rebulk.customize_properties(ordered)
return ordered
default_api = GuessItApi(rebulk_builder())

27
libs/guessit/backports.py Normal file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Backports
"""
# pragma: no-cover
# pylint: disabled
def cmp_to_key(mycmp):
"""functools.cmp_to_key backport"""
class KeyClass(object):
"""Key class"""
def __init__(self, obj, *args): # pylint: disable=unused-argument
self.obj = obj
def __lt__(self, other):
return mycmp(self.obj, other.obj) < 0
def __gt__(self, other):
return mycmp(self.obj, other.obj) > 0
def __eq__(self, other):
return mycmp(self.obj, other.obj) == 0
def __le__(self, other):
return mycmp(self.obj, other.obj) <= 0
def __ge__(self, other):
return mycmp(self.obj, other.obj) >= 0
def __ne__(self, other):
return mycmp(self.obj, other.obj) != 0
return KeyClass

View file

@ -1,771 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from .patterns import compile_pattern, sep
from . import base_text_type
from .guess import Guess
import types
def _get_span(prop, match):
"""Retrieves span for a match"""
if not prop.global_span and match.re.groups:
start = None
end = None
for i in range(1, match.re.groups + 1):
span = match.span(i)
if start is None or span[0] < start:
start = span[0]
if end is None or span[1] > end:
end = span[1]
return start, end
else:
return match.span()
start = span[0]
end = span[1]
def _trim_span(span, value, blanks = sep):
start, end = span
for i in range(0, len(value)):
if value[i] in blanks:
start += 1
else:
break
for i in reversed(range(0, len(value))):
if value[i] in blanks:
end -= 1
else:
break
if end <= start:
return -1, -1
return start, end
def _get_groups(compiled_re):
"""
Retrieves groups from re
:return: list of group names
"""
if compiled_re.groups:
indexgroup = {}
for k, i in compiled_re.groupindex.items():
indexgroup[i] = k
ret = []
for i in range(1, compiled_re.groups + 1):
ret.append(indexgroup.get(i, i))
return ret
else:
return [None]
class NoValidator(object):
def validate(self, prop, string, node, match, entry_start, entry_end):
return True
class LeftValidator(object):
"""Make sure our match is starting by separator, or by another entry"""
def validate(self, prop, string, node, match, entry_start, entry_end):
span = _get_span(prop, match)
span = _trim_span(span, string[span[0]:span[1]])
start, end = span
sep_start = start <= 0 or string[start - 1] in sep
start_by_other = start in entry_end
if not sep_start and not start_by_other:
return False
return True
class RightValidator(object):
"""Make sure our match is ended by separator, or by another entry"""
def validate(self, prop, string, node, match, entry_start, entry_end):
span = _get_span(prop, match)
span = _trim_span(span, string[span[0]:span[1]])
start, end = span
sep_end = end >= len(string) or string[end] in sep
end_by_other = end in entry_start
if not sep_end and not end_by_other:
return False
return True
class ChainedValidator(object):
def __init__(self, *validators):
self._validators = validators
def validate(self, prop, string, node, match, entry_start, entry_end):
for validator in self._validators:
if not validator.validate(prop, string, node, match, entry_start, entry_end):
return False
return True
class SameKeyValidator(object):
def __init__(self, validator_function):
self.validator_function = validator_function
def validate(self, prop, string, node, match, entry_start, entry_end):
for key in prop.keys:
for same_value_leaf in node.root.leaves_containing(key):
ret = self.validator_function(same_value_leaf, key, prop, string, node, match, entry_start, entry_end)
if ret is not None:
return ret
return True
class OnlyOneValidator(SameKeyValidator):
def __init__(self):
super(OnlyOneValidator, self).__init__(lambda same_value_leaf, key, prop, string, node, match, entry_start, entry_end: False)
class DefaultValidator(object):
"""Make sure our match is surrounded by separators, or by another entry"""
def validate(self, prop, string, node, match, entry_start, entry_end):
span = _get_span(prop, match)
span = _trim_span(span, string[span[0]:span[1]])
start, end = span
sep_start = start <= 0 or string[start - 1] in sep
sep_end = end >= len(string) or string[end] in sep
start_by_other = start in entry_end
end_by_other = end in entry_start
if (sep_start or start_by_other) and (sep_end or end_by_other):
return True
return False
class FunctionValidator(object):
def __init__(self, function):
self.function = function
def validate(self, prop, string, node, match, entry_start, entry_end):
return self.function(prop, string, node, match, entry_start, entry_end)
class FormatterValidator(object):
def __init__(self, group_name=None, formatted_validator=None):
self.group_name = group_name
self.formatted_validator = formatted_validator
def validate(self, prop, string, node, match, entry_start, entry_end):
if self.group_name:
formatted = prop.format(match.group(self.group_name), self.group_name)
else:
formatted = prop.format(match.group())
if self.formatted_validator:
return self.formatted_validator(formatted)
else:
return formatted
def _get_positions(prop, string, node, match, entry_start, entry_end):
span = match.span()
start = span[0]
end = span[1]
at_start = True
at_end = True
while start > 0:
start -= 1
if string[start] not in sep:
at_start = False
break
while end < len(string) - 1:
end += 1
if string[end] not in sep:
at_end = False
break
return at_start, at_end
class WeakValidator(DefaultValidator):
"""Make sure our match is surrounded by separators and is the first or last element in the string"""
def validate(self, prop, string, node, match, entry_start, entry_end):
if super(WeakValidator, self).validate(prop, string, node, match, entry_start, entry_end):
at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
return at_start or at_end
return False
class NeighborValidator(DefaultValidator):
"""Make sure the node is next another one"""
def validate(self, prop, string, node, match, entry_start, entry_end):
at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
if at_start:
previous_leaf = node.root.previous_leaf(node)
if previous_leaf is not None:
return True
if at_end:
next_leaf = node.root.next_leaf(node)
if next_leaf is not None:
return True
return False
class LeavesValidator(DefaultValidator):
def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
self.previous_lambdas = previous_lambdas if previous_lambdas is not None else []
self.next_lambdas = next_lambdas if next_lambdas is not None else []
if lambdas:
self.previous_lambdas.extend(lambdas)
self.next_lambdas.extend(lambdas)
self.both_side = both_side
self.default_ = default_
"""Make sure our match is surrounded by separators and validates defined lambdas"""
def validate(self, prop, string, node, match, entry_start, entry_end):
if self.default_:
super_ret = super(LeavesValidator, self).validate(prop, string, node, match, entry_start, entry_end)
else:
super_ret = True
if not super_ret:
return False
previous_ = self._validate_previous(prop, string, node, match, entry_start, entry_end)
next_ = self._validate_next(prop, string, node, match, entry_start, entry_end)
if previous_ is None and next_ is None:
return super_ret
if self.both_side:
return previous_ and next_
else:
return previous_ or next_
def _validate_previous(self, prop, string, node, match, entry_start, entry_end):
if self.previous_lambdas:
for leaf in node.root.previous_leaves(node):
for lambda_ in self.previous_lambdas:
ret = self._check_rule(lambda_, leaf)
if ret is not None:
return ret
return False
def _validate_next(self, prop, string, node, match, entry_start, entry_end):
if self.next_lambdas:
for leaf in node.root.next_leaves(node):
for lambda_ in self.next_lambdas:
ret = self._check_rule(lambda_, leaf)
if ret is not None:
return ret
return False
def _check_rule(self, lambda_, previous_leaf):
return lambda_(previous_leaf)
class _Property:
"""Represents a property configuration."""
def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None, disabler=None, confidence_lambda=None):
"""
:param keys: Keys of the property (format, screenSize, ...)
:type keys: string
:param canonical_form: Unique value of the property (DVD, 720p, ...)
:type canonical_form: string
:param pattern: Regexp pattern
:type pattern: string
:param confidence: confidence
:type confidence: float
:param enhance: enhance the pattern
:type enhance: boolean
:param global_span: if True, the whole match span will used to create the Guess.
Else, the span from the capturing groups will be used.
:type global_span: boolean
:param validator: Validator to use
:type validator: :class:`DefaultValidator`
:param formatter: Formater to use
:type formatter: function
"""
if isinstance(keys, list):
self.keys = keys
elif isinstance(keys, base_text_type):
self.keys = [keys]
else:
self.keys = []
self.canonical_form = canonical_form
if pattern is not None:
self.pattern = pattern
else:
self.pattern = canonical_form
if self.canonical_form is None and canonical_from_pattern:
self.canonical_form = self.pattern
self.compiled = compile_pattern(self.pattern, enhance=enhance)
for group_name in _get_groups(self.compiled):
if isinstance(group_name, base_text_type) and not group_name in self.keys:
self.keys.append(group_name)
if not self.keys:
raise ValueError("No property key is defined")
self.confidence = confidence
self.confidence_lambda = confidence_lambda
self.global_span = global_span
self.validator = validator
self.formatter = formatter
self.disabler = disabler
def disabled(self, options):
if self.disabler:
return self.disabler(options)
return False
def format(self, value, group_name=None):
"""Retrieves the final value from re group match value"""
formatter = None
if isinstance(self.formatter, dict):
formatter = self.formatter.get(group_name)
if formatter is None and group_name is not None:
formatter = self.formatter.get(None)
else:
formatter = self.formatter
if isinstance(formatter, types.FunctionType):
return formatter(value)
elif formatter is not None:
return formatter.format(value)
return value
def __repr__(self):
return "%s: %s" % (self.keys, self.canonical_form if self.canonical_form else self.pattern)
class PropertiesContainer(object):
def __init__(self, **kwargs):
self._properties = []
self.default_property_kwargs = kwargs
def unregister_property(self, name, *canonical_forms):
"""Unregister a property canonical forms
If canonical_forms are specified, only those values will be unregistered
:param name: Property name to unregister
:type name: string
:param canonical_forms: Values to unregister
:type canonical_forms: varargs of string
"""
_properties = [prop for prop in self._properties if prop.name == name and (not canonical_forms or prop.canonical_form in canonical_forms)]
def register_property(self, name, *patterns, **property_params):
"""Register property with defined canonical form and patterns.
:param name: name of the property (format, screenSize, ...)
:type name: string
:param patterns: regular expression patterns to register for the property canonical_form
:type patterns: varargs of string
"""
properties = []
for pattern in patterns:
params = dict(self.default_property_kwargs)
params.update(property_params)
if isinstance(pattern, dict):
params.update(pattern)
prop = _Property(name, **params)
else:
prop = _Property(name, pattern, **params)
self._properties.append(prop)
properties.append(prop)
return properties
def register_canonical_properties(self, name, *canonical_forms, **property_params):
"""Register properties from their canonical forms.
:param name: name of the property (releaseGroup, ...)
:type name: string
:param canonical_forms: values of the property ('ESiR', 'WAF', 'SEPTiC', ...)
:type canonical_forms: varargs of strings
"""
properties = []
for canonical_form in canonical_forms:
params = dict(property_params)
params['canonical_form'] = canonical_form
properties.extend(self.register_property(name, canonical_form, **property_params))
return properties
def unregister_all_properties(self):
"""Unregister all defined properties"""
self._properties.clear()
def find_properties(self, string, node, options, name=None, validate=True, re_match=False, sort=True, multiple=False):
"""Find all distinct properties for given string
If no capturing group is defined in the property, value will be grabbed from the entire match.
If one ore more unnamed capturing group is defined in the property, first capturing group will be used.
If named capturing group are defined in the property, they will be returned as property key.
If validate, found properties will be validated by their defined validator
If re_match, re.match will be used instead of re.search.
if sort, found properties will be sorted from longer match to shorter match.
If multiple is False and multiple values are found for the same property, the more confident one will be returned.
If multiple is False and multiple values are found for the same property and the same confidence, the longer will be returned.
:param string: input string
:type string: string
:param node: current node of the matching tree
:type node: :class:`guessit.matchtree.MatchTree`
:param name: name of property to find
:type name: string
:param re_match: use re.match instead of re.search
:type re_match: bool
:param multiple: Allows multiple property values to be returned
:type multiple: bool
:return: found properties
:rtype: list of tuples (:class:`_Property`, match, list of tuples (property_name, tuple(value_start, value_end)))
:see: `_Property`
:see: `register_property`
:see: `register_canonical_properties`
"""
entry_start = {}
entry_end = {}
entries = []
duplicate_matches = {}
ret = []
if not string.strip():
return ret
# search all properties
for prop in self.get_properties(name):
if not prop.disabled(options):
valid_match = None
if re_match:
match = prop.compiled.match(string)
if match:
entries.append((prop, match))
else:
matches = list(prop.compiled.finditer(string))
duplicate_matches[prop] = matches
for match in matches:
entries.append((prop, match))
for prop, match in entries:
# compute confidence
if prop.confidence_lambda:
computed_confidence = prop.confidence_lambda(match)
if computed_confidence is not None:
prop.confidence = computed_confidence
if validate:
# compute entries start and ends
for prop, match in entries:
start, end = _get_span(prop, match)
if start not in entry_start:
entry_start[start] = [prop]
else:
entry_start[start].append(prop)
if end not in entry_end:
entry_end[end] = [prop]
else:
entry_end[end].append(prop)
# remove invalid values
while True:
invalid_entries = []
for entry in entries:
prop, match = entry
if not prop.validator.validate(prop, string, node, match, entry_start, entry_end):
invalid_entries.append(entry)
if not invalid_entries:
break
for entry in invalid_entries:
prop, match = entry
entries.remove(entry)
prop_duplicate_matches = duplicate_matches.get(prop)
if prop_duplicate_matches:
prop_duplicate_matches.remove(match)
invalid_span = _get_span(prop, match)
start = invalid_span[0]
end = invalid_span[1]
entry_start[start].remove(prop)
if not entry_start.get(start):
del entry_start[start]
entry_end[end].remove(prop)
if not entry_end.get(end):
del entry_end[end]
for prop, prop_duplicate_matches in duplicate_matches.items():
# Keeping the last valid match.
# Needed for the.100.109.hdtv-lol.mp4
for duplicate_match in prop_duplicate_matches[:-1]:
entries.remove((prop, duplicate_match))
if multiple:
ret = entries
else:
# keep only best match if multiple values where found
entries_dict = {}
for entry in entries:
for key in prop.keys:
if key not in entries_dict:
entries_dict[key] = []
entries_dict[key].append(entry)
for key_entries in entries_dict.values():
if multiple:
for entry in key_entries:
ret.append(entry)
else:
best_ret = {}
best_prop, best_match = None, None
if len(key_entries) == 1:
best_prop, best_match = key_entries[0]
else:
for prop, match in key_entries:
start, end = _get_span(prop, match)
if not best_prop or \
best_prop.confidence < best_prop.confidence or \
best_prop.confidence == best_prop.confidence and \
best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
best_prop, best_match = prop, match
best_ret[best_prop] = best_match
for prop, match in best_ret.items():
ret.append((prop, match))
if sort:
def _sorting(x):
_, x_match = x
x_start, x_end = x_match.span()
return x_start - x_end
ret.sort(key=_sorting)
return ret
def as_guess(self, found_properties, input=None, filter_=None, sep_replacement=None, multiple=False, *args, **kwargs):
if filter_ is None:
filter_ = lambda property, *args, **kwargs: True
guesses = [] if multiple else None
for prop, match in found_properties:
first_key = None
for key in prop.keys:
# First property key will be used as base for effective name
if isinstance(key, base_text_type):
if first_key is None:
first_key = key
break
property_name = first_key if first_key else None
span = _get_span(prop, match)
guess = Guess(confidence=prop.confidence, input=input, span=span, prop=property_name)
groups = _get_groups(match.re)
for group_name in groups:
name = group_name if isinstance(group_name, base_text_type) else property_name if property_name not in groups else None
if name:
value = self._effective_prop_value(prop, group_name, input, match.span(group_name) if group_name else match.span(), sep_replacement)
if not value is None:
is_string = isinstance(value, base_text_type)
if not is_string or is_string and value: # Keep non empty strings and other defined objects
if isinstance(value, dict):
for k, v in value.items():
if k is None:
k = name
guess[k] = v
else:
if name in guess:
if not isinstance(guess[name], list):
guess[name] = [guess[name]]
guess[name].append(value)
else:
guess[name] = value
if group_name:
guess.metadata(prop).span = match.span(group_name)
if filter_(guess):
if multiple:
guesses.append(guess)
else:
return guess
return guesses
def _effective_prop_value(self, prop, group_name, input=None, span=None, sep_replacement=None):
if prop.canonical_form:
return prop.canonical_form
if input is None:
return None
value = input
if span is not None:
value = value[span[0]:span[1]]
value = input[span[0]:span[1]] if input else None
if sep_replacement:
for sep_char in sep:
value = value.replace(sep_char, sep_replacement)
if value:
value = prop.format(value, group_name)
return value
def get_properties(self, name=None, canonical_form=None):
"""Retrieve properties
:return: Properties
:rtype: generator
"""
for prop in self._properties:
if (name is None or name in prop.keys) and (canonical_form is None or prop.canonical_form == canonical_form):
yield prop
def get_supported_properties(self):
supported_properties = {}
for prop in self.get_properties():
for k in prop.keys:
values = supported_properties.get(k)
if not values:
values = set()
supported_properties[k] = values
if prop.canonical_form:
values.add(prop.canonical_form)
return supported_properties
class QualitiesContainer():
def __init__(self):
self._qualities = {}
def register_quality(self, name, canonical_form, rating):
"""Register a quality rating.
:param name: Name of the property
:type name: string
:param canonical_form: Value of the property
:type canonical_form: string
:param rating: Estimated quality rating for the property
:type rating: int
"""
property_qualities = self._qualities.get(name)
if property_qualities is None:
property_qualities = {}
self._qualities[name] = property_qualities
property_qualities[canonical_form] = rating
def unregister_quality(self, name, *canonical_forms):
"""Unregister quality ratings for given property name.
If canonical_forms are specified, only those values will be unregistered
:param name: Name of the property
:type name: string
:param canonical_forms: Value of the property
:type canonical_forms: string
"""
if not canonical_forms:
if name in self._qualities:
del self._qualities[name]
else:
property_qualities = self._qualities.get(name)
if property_qualities is not None:
for property_canonical_form in canonical_forms:
if property_canonical_form in property_qualities:
del property_qualities[property_canonical_form]
if not property_qualities:
del self._qualities[name]
def clear_qualities(self,):
"""Unregister all defined quality ratings.
"""
self._qualities.clear()
def rate_quality(self, guess, *props):
"""Rate the quality of guess.
:param guess: Guess to rate
:type guess: :class:`guessit.guess.Guess`
:param props: Properties to include in the rating. if empty, rating will be performed for all guess properties.
:type props: varargs of string
:return: Quality of the guess. The higher, the better.
:rtype: int
"""
rate = 0
if not props:
props = guess.keys()
for prop in props:
prop_value = guess.get(prop)
prop_qualities = self._qualities.get(prop)
if prop_value is not None and prop_qualities is not None:
rate += prop_qualities.get(prop_value, 0)
return rate
def best_quality_properties(self, props, *guesses):
"""Retrieve the best quality guess, based on given properties
:param props: Properties to include in the rating
:type props: list of strings
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
rate = self.rate_quality(guess, *props)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess
def best_quality(self, *guesses):
"""Retrieve the best quality guess.
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
rate = self.rate_quality(guess)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess

View file

@ -1,129 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import datetime
import re
from dateutil import parser
_dsep = r'[-/ \.]'
_dsep_bis = r'[-/ \.x]'
date_regexps = [
re.compile('[^\d](\d{8})[^\d]', re.IGNORECASE),
re.compile('[^\d](\d{6})[^\d]', re.IGNORECASE),
re.compile('[^\d](\d{2})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{4})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep_bis, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{4})[^\d]' % (_dsep, _dsep_bis), re.IGNORECASE),
re.compile('[^\d](\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4})[^\d]' % (_dsep, _dsep), re.IGNORECASE)]
def valid_year(year, today=None):
"""Check if number is a valid year"""
if not today:
today = datetime.date.today()
return 1920 < year < today.year + 5
def search_year(string):
"""Looks for year patterns, and if found return the year and group span.
Assumes there are sentinels at the beginning and end of the string that
always allow matching a non-digit delimiting the date.
Note this only looks for valid production years, that is between 1920
and now + 5 years, so for instance 2000 would be returned as a valid
year but 1492 would not.
>>> search_year(' in the year 2000... ')
(2000, (13, 17))
>>> search_year(' they arrived in 1492. ')
(None, None)
"""
match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
if match:
year = int(match.group(1))
if valid_year(year):
return (year, match.span(1))
return (None, None)
def search_date(string, year_first=None, day_first=True):
"""Looks for date patterns, and if found return the date and group span.
Assumes there are sentinels at the beginning and end of the string that
always allow matching a non-digit delimiting the date.
Year can be defined on two digit only. It will return the nearest possible
date from today.
>>> search_date(' This happened on 2002-04-22. ')
(datetime.date(2002, 4, 22), (18, 28))
>>> search_date(' And this on 17-06-1998. ')
(datetime.date(1998, 6, 17), (13, 23))
>>> search_date(' no date in here ')
(None, None)
"""
start, end = None, None
match = None
for date_re in date_regexps:
s = date_re.search(string)
if s and (match is None or s.end() - s.start() > len(match)):
start, end = s.start(), s.end()
if date_re.groups:
match = '-'.join(s.groups())
else:
match = s.group()
if match is None:
return None, None
today = datetime.date.today()
# If day_first/year_first is undefined, parse is made using both possible values.
yearfirst_opts = [False, True]
if year_first is not None:
yearfirst_opts = [year_first]
dayfirst_opts = [True, False]
if day_first is not None:
dayfirst_opts = [day_first]
kwargs_list = ({'dayfirst': d, 'yearfirst': y} for d in dayfirst_opts for y in yearfirst_opts)
for kwargs in kwargs_list:
try:
date = parser.parse(match, **kwargs)
except (ValueError, TypeError) as e: #see https://bugs.launchpad.net/dateutil/+bug/1247643
date = None
pass
# check date plausibility
if date and valid_year(date.year, today=today):
return date.date(), (start+1, end-1) #compensate for sentinels
return None, None

View file

@ -1,87 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import s, u
import os.path
import zipfile
import io
def split_path(path):
r"""Splits the given path into the list of folders and the filename (or the
last folder if you gave it a folder path.
If the given path was an absolute path, the first element will always be:
- the '/' root folder on Unix systems
- the drive letter on Windows systems (eg: r'C:\')
- the mount point '\\' on Windows systems (eg: r'\\host\share')
>>> s(split_path('/usr/bin/smewt'))
['/', 'usr', 'bin', 'smewt']
>>> s(split_path('relative_path/to/my_folder/'))
['relative_path', 'to', 'my_folder']
"""
result = []
while True:
head, tail = os.path.split(path)
if not head and not tail:
return result
if not tail and head == path:
# Make sure we won't have an infinite loop.
result = [head] + result
return result
# we just split a directory ending with '/', so tail is empty
if not tail:
path = head
continue
# otherwise, add the last path fragment and keep splitting
result = [tail] + result
path = head
def file_in_same_dir(ref_file, desired_file):
"""Return the path for a file in the same dir as a given reference file.
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings')) == os.path.normpath('~/smewt/smewt.settings')
True
"""
return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))
def load_file_in_same_dir(ref_file, filename):
"""Load a given file. Works even when the file is contained inside a zip."""
path = split_path(ref_file)[:-1] + [filename]
for i, p in enumerate(path):
if p.endswith('.zip'):
zfilename = os.path.join(*path[:i + 1])
zfile = zipfile.ZipFile(zfilename)
return u(zfile.read('/'.join(path[i + 1:])))
return u(io.open(os.path.join(*path), encoding='utf-8').read())

View file

@ -1,514 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import UnicodeMixin, s, u, base_text_type
from babelfish import Language, Country
import json
import datetime
import logging
log = logging.getLogger(__name__)
class GuessMetadata(object):
"""GuessMetadata contains confidence, an input string, span and related property.
If defined on a property of Guess object, it overrides the object defined as global.
:param parent: The parent metadata, used for undefined properties in self object
:type parent: :class: `GuessMedata`
:param confidence: The confidence (from 0.0 to 1.0)
:type confidence: number
:param input: The input string
:type input: string
:param span: The input string
:type span: tuple (int, int)
:param prop: The found property definition
:type prop: :class `guessit.containers._Property`
"""
def __init__(self, parent=None, confidence=None, input=None, span=None, prop=None, *args, **kwargs):
self.parent = parent
if confidence is None and self.parent is None:
self._confidence = 1.0
else:
self._confidence = confidence
self._input = input
self._span = span
self._prop = prop
@property
def confidence(self):
"""The confidence
:rtype: int
:return: confidence value
"""
return self._confidence if self._confidence is not None else self.parent.confidence if self.parent else None
@confidence.setter
def confidence(self, confidence):
self._confidence = confidence
@property
def input(self):
"""The input
:rtype: string
:return: String used to find this guess value
"""
return self._input if self._input is not None else self.parent.input if self.parent else None
@input.setter
def input(self, input):
"""The input
:rtype: string
"""
self._input = input
@property
def span(self):
"""The span
:rtype: tuple (int, int)
:return: span of input string used to find this guess value
"""
return self._span if self._span is not None else self.parent.span if self.parent else None
@span.setter
def span(self, span):
"""The span
:rtype: tuple (int, int)
:return: span of input string used to find this guess value
"""
self._span = span
@property
def prop(self):
"""The property
:rtype: :class:`_Property`
:return: The property
"""
return self._prop if self._prop is not None else self.parent.prop if self.parent else None
@property
def raw(self):
"""Return the raw information (original match from the string,
not the cleaned version) associated with the given property name."""
if self.input and self.span:
return self.input[self.span[0]:self.span[1]]
return None
def __repr__(self, *args, **kwargs):
return object.__repr__(self, *args, **kwargs)
def _split_kwargs(**kwargs):
metadata_args = {}
for prop in dir(GuessMetadata):
try:
metadata_args[prop] = kwargs.pop(prop)
except KeyError:
pass
return metadata_args, kwargs
class Guess(UnicodeMixin, dict):
"""A Guess is a dictionary which has an associated confidence for each of
its values.
As it is a subclass of dict, you can use it everywhere you expect a
simple dict."""
def __init__(self, *args, **kwargs):
metadata_kwargs, kwargs = _split_kwargs(**kwargs)
self._global_metadata = GuessMetadata(**metadata_kwargs)
dict.__init__(self, *args, **kwargs)
self._metadata = {}
for prop in self:
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
def rename(self, old_name, new_name):
if old_name in self._metadata:
metadata = self._metadata[old_name]
del self._metadata[old_name]
self._metadata[new_name] = metadata
if old_name in self:
value = self[old_name]
del self[old_name]
self[new_name] = value
return True
return False
def to_dict(self, advanced=False):
"""Return the guess as a dict containing only base types, ie:
where dates, languages, countries, etc. are converted to strings.
if advanced is True, return the data as a json string containing
also the raw information of the properties."""
data = dict(self)
for prop, value in data.items():
if isinstance(value, datetime.date):
data[prop] = value.isoformat()
elif isinstance(value, (UnicodeMixin, base_text_type)):
data[prop] = u(value)
elif isinstance(value, (Language, Country)):
data[prop] = value.guessit
elif isinstance(value, list):
data[prop] = [u(x) for x in value]
if advanced:
metadata = self.metadata(prop)
prop_data = {'value': data[prop]}
if metadata.raw:
prop_data['raw'] = metadata.raw
if metadata.confidence:
prop_data['confidence'] = metadata.confidence
data[prop] = prop_data
return data
def nice_string(self, advanced=False):
"""Return a string with the property names and their values,
that also displays the associated confidence to each property.
FIXME: doc with param"""
if advanced:
data = self.to_dict(advanced)
return json.dumps(data, indent=4)
else:
data = self.to_dict()
parts = json.dumps(data, indent=4).split('\n')
for i, p in enumerate(parts):
if p[:5] != ' "':
continue
prop = p.split('"')[1]
parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:]
return '\n'.join(parts)
def __unicode__(self):
return u(self.to_dict())
def metadata(self, prop=None):
"""Return the metadata associated with the given property name
If no property name is given, get the global_metadata
"""
if prop is None:
return self._global_metadata
if prop not in self._metadata:
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
return self._metadata[prop]
def confidence(self, prop=None):
return self.metadata(prop).confidence
def set_confidence(self, prop, confidence):
self.metadata(prop).confidence = confidence
def raw(self, prop):
return self.metadata(prop).raw
def set(self, prop_name, value, *args, **kwargs):
if value is None:
try:
del self[prop_name]
except KeyError:
pass
try:
del self._metadata[prop_name]
except KeyError:
pass
else:
self[prop_name] = value
if 'metadata' in kwargs.keys():
self._metadata[prop_name] = kwargs['metadata']
else:
self._metadata[prop_name] = GuessMetadata(parent=self._global_metadata, *args, **kwargs)
def update(self, other, confidence=None):
dict.update(self, other)
if isinstance(other, Guess):
for prop in other:
try:
self._metadata[prop] = other._metadata[prop]
except KeyError:
pass
if confidence is not None:
for prop in other:
self.set_confidence(prop, confidence)
def update_highest_confidence(self, other):
"""Update this guess with the values from the given one. In case
there is property present in both, only the one with the highest one
is kept."""
if not isinstance(other, Guess):
raise ValueError('Can only call this function on Guess instances')
for prop in other:
if prop in self and self.metadata(prop).confidence >= other.metadata(prop).confidence:
continue
self[prop] = other[prop]
self._metadata[prop] = other.metadata(prop)
def choose_int(g1, g2):
"""Function used by merge_similar_guesses to choose between 2 possible
properties when they are integers."""
v1, c1 = g1 # value, confidence
v2, c2 = g2
if (v1 == v2):
return (v1, 1 - (1 - c1) * (1 - c2))
else:
if c1 > c2:
return (v1, c1 - c2)
else:
return (v2, c2 - c1)
def choose_string(g1, g2):
"""Function used by merge_similar_guesses to choose between 2 possible
properties when they are strings.
If the 2 strings are similar, or one is contained in the other, the latter is returned
with an increased confidence.
If the 2 strings are dissimilar, the one with the higher confidence is returned, with
a weaker confidence.
Note that here, 'similar' means that 2 strings are either equal, or that they
differ very little, such as one string being the other one with the 'the' word
prepended to it.
>>> s(choose_string(('Hello', 0.75), ('World', 0.5)))
('Hello', 0.25)
>>> s(choose_string(('Hello', 0.5), ('hello', 0.5)))
('Hello', 0.75)
>>> s(choose_string(('Hello', 0.4), ('Hello World', 0.4)))
('Hello', 0.64)
>>> s(choose_string(('simpsons', 0.5), ('The Simpsons', 0.5)))
('The Simpsons', 0.75)
"""
v1, c1 = g1 # value, confidence
v2, c2 = g2
if not v1:
return g2
elif not v2:
return g1
v1, v2 = v1.strip(), v2.strip()
v1l, v2l = v1.lower(), v2.lower()
combined_prob = 1 - (1 - c1) * (1 - c2)
if v1l == v2l:
return v1, combined_prob
# check for common patterns
elif v1l == 'the ' + v2l:
return v1, combined_prob
elif v2l == 'the ' + v1l:
return v2, combined_prob
# if one string is contained in the other, return the shortest one
elif v2l in v1l:
return v2, combined_prob
elif v1l in v2l:
return v1, combined_prob
# in case of conflict, return the one with highest confidence
else:
if c1 > c2:
return v1, c1 - c2
else:
return v2, c2 - c1
def _merge_similar_guesses_nocheck(guesses, prop, choose):
"""Take a list of guesses and merge those which have the same properties,
increasing or decreasing the confidence depending on whether their values
are similar.
This function assumes there are at least 2 valid guesses."""
similar = [guess for guess in guesses if prop in guess]
g1, g2 = similar[0], similar[1]
# merge only this prop of s2 into s1, updating the confidence for the
# considered property
v1, v2 = g1[prop], g2[prop]
c1, c2 = g1.confidence(prop), g2.confidence(prop)
new_value, new_confidence = choose((v1, c1), (v2, c2))
if new_confidence >= c1:
msg = "Updating matching property '%s' with confidence %.2f"
else:
msg = "Updating non-matching property '%s' with confidence %.2f"
log.debug(msg % (prop, new_confidence))
g1.set(prop, new_value, confidence=new_confidence)
g2.pop(prop)
# remove g2 if there are no properties left
if not g2.keys():
guesses.remove(g2)
def merge_similar_guesses(guesses, prop, choose):
"""Take a list of guesses and merge those which have the same properties,
increasing or decreasing the confidence depending on whether their values
are similar."""
similar = [guess for guess in guesses if prop in guess]
if len(similar) < 2:
# nothing to merge
return
if len(similar) == 2:
_merge_similar_guesses_nocheck(guesses, prop, choose)
if len(similar) > 2:
log.debug('complex merge, trying our best...')
before = len(guesses)
_merge_similar_guesses_nocheck(guesses, prop, choose)
after = len(guesses)
if after < before:
# recurse only when the previous call actually did something,
# otherwise we end up in an infinite loop
merge_similar_guesses(guesses, prop, choose)
def merge_all(guesses, append=None):
"""Merge all the guesses in a single result, remove very unlikely values,
and return it.
You can specify a list of properties that should be appended into a list
instead of being merged.
>>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
... Guess({'episodeNumber': 13}, confidence=0.8) ])
... ) == {'season': 2, 'episodeNumber': 13}
True
>>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
... Guess({'season': 1}, confidence=0.2) ])
... ) == {'season': 1}
True
>>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
... Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
... append=['other'])
... ) == {'releaseGroup': '2HD', 'other': ['PROPER']}
True
"""
result = Guess()
if not guesses:
return result
if append is None:
append = []
for g in guesses:
# first append our appendable properties
for prop in append:
if prop in g:
if isinstance(g[prop], (list, set)):
new_values = result.get(prop, []) + list(g[prop])
else:
new_values = result.get(prop, []) + [g[prop]]
result.set(prop, new_values,
# TODO: what to do with confidence here? maybe an
# arithmetic mean...
confidence=g.metadata(prop).confidence,
input=g.metadata(prop).input,
span=g.metadata(prop).span,
prop=g.metadata(prop).prop)
del g[prop]
# then merge the remaining ones
dups = set(result) & set(g)
if dups:
log.debug('duplicate properties %s in merged result...' % [(result[p], g[p]) for p in dups])
result.update_highest_confidence(g)
# delete very unlikely values
for p in list(result.keys()):
if result.confidence(p) < 0.05:
del result[p]
# make sure our appendable properties contain unique values
for prop in append:
try:
value = result[prop]
if isinstance(value, list):
result[prop] = list(set(value))
else:
result[prop] = [value]
except KeyError:
pass
return result
def smart_merge(guesses):
"""First tries to merge well-known similar properties, and then merges
the rest with a merge_all call.
Should be the function to call in most cases, unless one wants to have more
control.
Warning: this function is destructive, ie: it will merge the list in-place.
"""
# 1- try to merge similar information together and give it a higher
# confidence
for int_part in ('year', 'season', 'episodeNumber'):
merge_similar_guesses(guesses, int_part, choose_int)
for string_part in ('title', 'series', 'container', 'format',
'releaseGroup', 'website', 'audioCodec',
'videoCodec', 'screenSize', 'episodeFormat',
'audioChannels', 'idNumber'):
merge_similar_guesses(guesses, string_part, choose_string)
# 2- merge the rest, potentially discarding information not properly
# merged before
result = merge_all(guesses,
append=['language', 'subtitleLanguage', 'other',
'episodeDetails', 'unidentified'])
return result

View file

@ -1,69 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import s, to_hex
import hashlib
import os.path
from functools import reduce
def hash_file(filename):
"""Returns the ed2k hash of a given file.
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
>>> s(hash_file(testfile))
'ed2k://|file|dummy.srt|59|41F58B913AB3973F593BEBA8B8DF6510|/'
"""
return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
os.path.getsize(filename),
hash_filehash(filename).upper())
def hash_filehash(filename):
"""Returns the ed2k hash of a given file.
This function is taken from:
http://www.radicand.org/blog/orz/2010/2/21/edonkey2000-hash-in-python/
"""
md4 = hashlib.new('md4').copy
def gen(f):
while True:
x = f.read(9728000)
if x:
yield x
else:
return
def md4_hash(data):
m = md4()
m.update(data)
return m
with open(filename, 'rb') as f:
a = gen(f)
hashes = [md4_hash(data).digest() for data in a]
if len(hashes) == 1:
return to_hex(hashes[0])
else:
return md4_hash(reduce(lambda a, d: a + d, hashes, "")).hexd

View file

@ -1,58 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import struct
import os
def hash_file(filename):
"""This function is taken from:
http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
and is licensed under the GPL."""
longlongformat = b'q' # long long
bytesize = struct.calcsize(longlongformat)
f = open(filename, "rb")
filesize = os.path.getsize(filename)
hash_value = filesize
if filesize < 65536 * 2:
raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
for x in range(int(65536 / bytesize)):
buf = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buf)
hash_value += l_value
hash_value &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
f.seek(max(0, filesize - 65536), 0)
for x in range(int(65536 / bytesize)):
buf = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buf)
hash_value += l_value
hash_value &= 0xFFFFFFFFFFFFFFFF
f.close()
return "%016x" % hash_value

32
libs/guessit/jsonutils.py Normal file
View file

@ -0,0 +1,32 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
JSON Utils
"""
import json
try:
from collections import OrderedDict
except ImportError: # pragma: no-cover
from ordereddict import OrderedDict # pylint:disable=import-error
from rebulk.match import Match
class GuessitEncoder(json.JSONEncoder):
"""
JSON Encoder for guessit response
"""
def default(self, o): # pylint:disable=method-hidden
if isinstance(o, Match):
ret = OrderedDict()
ret['value'] = o.value
if o.raw:
ret['raw'] = o.raw
ret['start'] = o.start
ret['end'] = o.end
return ret
elif hasattr(o, 'name'): # Babelfish languages/countries long name
return str(o.name)
else: # pragma: no cover
return str(o)

View file

@ -1,311 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import UnicodeMixin, base_text_type, u
from guessit.textutils import find_words
from babelfish import Language, Country
import babelfish
import re
import logging
from guessit.guess import Guess
__all__ = ['Language', 'UNDETERMINED',
'search_language', 'guess_language']
log = logging.getLogger(__name__)
UNDETERMINED = babelfish.Language('und')
SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'],
('ell', None): ['gr', 'greek'],
('spa', None): ['esp', 'español'],
('fra', None): ['français', 'vf', 'vff', 'vfi'],
('swe', None): ['se'],
('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'],
('cat', None): ['català'],
('ces', None): ['cz'],
('ukr', None): ['ua'],
('zho', None): ['cn'],
('jpn', None): ['jp'],
('hrv', None): ['scr'],
('mul', None): ['multi', 'dl'], # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
}
class GuessitConverter(babelfish.LanguageReverseConverter):
_with_country_regexp = re.compile('(.*)\((.*)\)')
_with_country_regexp2 = re.compile('(.*)-(.*)')
def __init__(self):
self.guessit_exceptions = {}
for (alpha3, country), synlist in SYN.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@property
def codes(self):
return (babelfish.language_converters['alpha3b'].codes |
babelfish.language_converters['alpha2'].codes |
babelfish.language_converters['name'].codes |
babelfish.language_converters['opensubtitles'].codes |
babelfish.country_converters['name'].codes |
frozenset(self.guessit_exceptions.keys()))
def convert(self, alpha3, country=None, script=None):
return str(babelfish.Language(alpha3, country, script))
def reverse(self, name):
with_country = (GuessitConverter._with_country_regexp.match(name) or
GuessitConverter._with_country_regexp2.match(name))
name = u(name.lower())
if with_country:
lang = Language.fromguessit(with_country.group(1).strip())
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None)
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
return self.guessit_exceptions[name]
except KeyError:
pass
for conv in [babelfish.Language,
babelfish.Language.fromalpha3b,
babelfish.Language.fromalpha2,
babelfish.Language.fromname,
babelfish.Language.fromopensubtitles]:
try:
c = conv(name)
return c.alpha3, c.country, c.script
except (ValueError, babelfish.LanguageReverseError):
pass
raise babelfish.LanguageReverseError(name)
babelfish.language_converters['guessit'] = GuessitConverter()
COUNTRIES_SYN = {'ES': ['españa'],
'GB': ['UK'],
'BR': ['brazilian', 'bra'],
# FIXME: this one is a bit of a stretch, not sure how to do
# it properly, though...
'MX': ['Latinoamérica', 'latin america']
}
class GuessitCountryConverter(babelfish.CountryReverseConverter):
def __init__(self):
self.guessit_exceptions = {}
for alpha2, synlist in COUNTRIES_SYN.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = alpha2
@property
def codes(self):
return (babelfish.country_converters['name'].codes |
frozenset(babelfish.COUNTRIES.values()) |
frozenset(self.guessit_exceptions.keys()))
def convert(self, alpha2):
if alpha2 == 'GB':
return 'UK'
return str(Country(alpha2))
def reverse(self, name):
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
return self.guessit_exceptions[name.lower()]
except KeyError:
pass
try:
return babelfish.Country(name.upper()).alpha2
except ValueError:
pass
for conv in [babelfish.Country.fromname]:
try:
return conv(name).alpha2
except babelfish.CountryReverseError:
pass
raise babelfish.CountryReverseError(name)
babelfish.country_converters['guessit'] = GuessitCountryConverter()
# list of common words which could be interpreted as languages, but which
# are far too common to be able to say they represent a language in the
# middle of a string (where they most likely carry their commmon meaning)
LNG_COMMON_WORDS = frozenset([
# english words
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb', 'bt',
'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice', 'ay',
# french words
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
'ne', 'ma', 'va', 'au',
# japanese words,
'wa', 'ga', 'ao',
# spanish words
'la', 'el', 'del', 'por', 'mar',
# other
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
'vi', 'ben', 'da', 'lt', 'ch',
# new from babelfish
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
'fer', 'fun', 'two', 'big', 'psy', 'air',
# movie title
'brazil',
# release groups
'bs', # Bosnian
'kz',
# countries
'gt', 'lt',
# part/pt
'pt'
])
LNG_COMMON_WORDS_STRICT = frozenset(['brazil'])
subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
lang_prefixes = ['true']
def find_possible_languages(string, allowed_languages=None):
"""Find possible languages in the string
:return: list of tuple (property, Language, lang_word, word)
"""
common_words = None
if allowed_languages:
common_words = LNG_COMMON_WORDS_STRICT
else:
common_words = LNG_COMMON_WORDS
words = find_words(string)
valid_words = []
for word in words:
lang_word = word.lower()
key = 'language'
for prefix in subtitle_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
key = 'subtitleLanguage'
for suffix in subtitle_suffixes:
if lang_word.endswith(suffix):
lang_word = lang_word[:len(suffix)]
key = 'subtitleLanguage'
for prefix in lang_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
if lang_word not in common_words:
try:
lang = Language.fromguessit(lang_word)
if allowed_languages:
if lang.name.lower() in allowed_languages or lang.alpha2.lower() in allowed_languages or lang.alpha3.lower() in allowed_languages:
valid_words.append((key, lang, lang_word, word))
# Keep language with alpha2 equivalent. Others are probably
# uncommon languages.
elif lang == 'mul' or hasattr(lang, 'alpha2'):
valid_words.append((key, lang, lang_word, word))
except babelfish.Error:
pass
return valid_words
def search_language(string, allowed_languages=None):
"""Looks for language patterns, and if found return the language object,
its group span and an associated confidence.
you can specify a list of allowed languages using the lang_filter argument,
as in lang_filter = [ 'fr', 'eng', 'spanish' ]
>>> search_language('movie [en].avi')['language']
<Language [en]>
>>> search_language('the zen fat cat and the gay mad men got a new fan', allowed_languages = ['en', 'fr', 'es'])
"""
if allowed_languages:
allowed_languages = set(Language.fromguessit(lang) for lang in allowed_languages)
confidence = 1.0 # for all of them
for prop, language, lang, word in find_possible_languages(string, allowed_languages):
pos = string.find(word)
end = pos + len(word)
# only allow those languages that have a 2-letter code, those that
# don't are too esoteric and probably false matches
# if language.lang not in lng3_to_lng2:
# continue
# confidence depends on alpha2, alpha3, english name, ...
if len(lang) == 2:
confidence = 0.8
elif len(lang) == 3:
confidence = 0.9
elif prop == 'subtitleLanguage':
confidence = 0.6 # Subtitle prefix found with language
else:
# Note: we could either be really confident that we found a
# language or assume that full language names are too
# common words and lower their confidence accordingly
confidence = 0.3 # going with the low-confidence route here
return Guess({prop: language}, confidence=confidence, input=string, span=(pos, end))
return None
def guess_language(text): # pragma: no cover
"""Guess the language in which a body of text is written.
This uses the external guess-language python module, and will fail and return
Language(Undetermined) if it is not installed.
"""
try:
from guess_language import guessLanguage
return Language.fromguessit(guessLanguage(text))
except ImportError:
log.error('Cannot detect the language of the given text body, missing dependency: guess-language')
log.error('Please install it from PyPI, by doing eg: pip install guess-language')
return UNDETERMINED

View file

@ -1,306 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, \
unicode_literals
import logging
from guessit import PY3, u
from guessit.transfo import TransformerException
from guessit.matchtree import MatchTree
from guessit.textutils import normalize_unicode, clean_default
from guessit.guess import Guess
import inspect
log = logging.getLogger(__name__)
class IterativeMatcher(object):
"""An iterative matcher tries to match different patterns that appear
in the filename.
The ``filetype`` argument indicates which type of file you want to match.
If it is undefined, the matcher will try to see whether it can guess
that the file corresponds to an episode, or otherwise will assume it is
a movie.
The recognized ``filetype`` values are:
``['subtitle', 'info', 'movie', 'moviesubtitle', 'movieinfo', 'episode',
'episodesubtitle', 'episodeinfo']``
``options`` is a dict of options values to be passed to the transformations used
by the matcher.
The IterativeMatcher works mainly in 2 steps:
First, it splits the filename into a match_tree, which is a tree of groups
which have a semantic meaning, such as episode number, movie title,
etc...
The match_tree created looks like the following::
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
__________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
xxxxxttttttttttttt ffffff vvvv xxxxxx ll lll xx xxx ccc
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
The first 3 lines indicates the group index in which a char in the
filename is located. So for instance, ``x264`` (in the middle) is the group (0, 4, 1), and
it corresponds to a video codec, denoted by the letter ``v`` in the 4th line.
(for more info, see guess.matchtree.to_string)
Second, it tries to merge all this information into a single object
containing all the found properties, and does some (basic) conflict
resolution when they arise.
"""
def __init__(self, filename, options=None, **kwargs):
options = dict(options or {})
for k, v in kwargs.items():
if k not in options or not options[k]:
options[k] = v # options dict has priority over keyword arguments
self._validate_options(options)
if not PY3 and not isinstance(filename, unicode):
log.warning('Given filename to matcher is not unicode...')
filename = filename.decode('utf-8')
filename = normalize_unicode(filename)
if options and options.get('clean_function'):
clean_function = options.get('clean_function')
if not hasattr(clean_function, '__call__'):
module, function = clean_function.rsplit('.')
if not module:
module = 'guessit.textutils'
clean_function = getattr(__import__(module), function)
if not clean_function:
log.error('Can\'t find clean function %s. Default will be used.' % options.get('clean_function'))
clean_function = clean_default
else:
clean_function = clean_default
self.match_tree = MatchTree(filename, clean_function=clean_function)
self.options = options
self._transfo_calls = []
# sanity check: make sure we don't process a (mostly) empty string
if clean_function(filename).strip() == '':
return
from guessit.plugins import transformers
try:
mtree = self.match_tree
if 'type' in self.options:
mtree.guess.set('type', self.options['type'], confidence=0.0)
# Process
for transformer in transformers.all_transformers():
disabled = options.get('disabled_transformers')
if not disabled or transformer.name not in disabled:
self._process(transformer, False)
# Post-process
for transformer in transformers.all_transformers():
disabled = options.get('disabled_transformers')
if not disabled or transformer.name not in disabled:
self._process(transformer, True)
log.debug('Found match tree:\n%s' % u(mtree))
except TransformerException as e:
log.debug('An error has occurred in Transformer %s: %s' % (e.transformer, e))
def _process(self, transformer, post=False):
if not hasattr(transformer, 'should_process') or transformer.should_process(self.match_tree, self.options):
if post:
transformer.post_process(self.match_tree, self.options)
else:
transformer.process(self.match_tree, self.options)
self._transfo_calls.append(transformer)
@property
def second_pass_options(self):
second_pass_options = {}
for transformer in self._transfo_calls:
if hasattr(transformer, 'second_pass_options'):
transformer_second_pass_options = transformer.second_pass_options(self.match_tree, self.options)
if transformer_second_pass_options:
second_pass_options.update(transformer_second_pass_options)
return second_pass_options
def _validate_options(self, options):
valid_filetypes = ('subtitle', 'info', 'video',
'movie', 'moviesubtitle', 'movieinfo',
'episode', 'episodesubtitle', 'episodeinfo')
type_ = options.get('type')
if type_ and type_ not in valid_filetypes:
raise ValueError("filetype needs to be one of %s" % (valid_filetypes,))
def matched(self):
return self.match_tree.matched()
def build_guess(node, name, value=None, confidence=1.0):
guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence)
guess.metadata().input = node.value if value is None else value
if value is None:
left_offset = 0
right_offset = 0
clean_value = node.clean_value
for i in range(0, len(node.value)):
if clean_value[0] == node.value[i]:
break
left_offset += 1
for i in reversed(range(0, len(node.value))):
if clean_value[-1] == node.value[i]:
break
right_offset += 1
guess.metadata().span = (node.span[0] - node.offset + left_offset, node.span[1] - node.offset - right_offset)
return guess
def found_property(node, name, value=None, confidence=1.0, update_guess=True, logger=None):
# automatically retrieve the log object from the caller frame
if not logger:
caller_frame = inspect.stack()[1][0]
logger = caller_frame.f_locals['self'].log
guess = build_guess(node, name, value, confidence)
return found_guess(node, guess, update_guess=update_guess, logger=logger)
def found_guess(node, guess, update_guess=True, logger=None):
if node.guess:
if update_guess:
node.guess.update_highest_confidence(guess)
else:
child = node.add_child(guess.metadata().span)
child.guess = guess
else:
node.guess = guess
log_found_guess(guess, logger)
return node.guess
def log_found_guess(guess, logger=None):
for k, v in guess.items():
(logger or log).debug('Property found: %s=%s (%s) (confidence=%.2f)' %
(k, v, guess.raw(k), guess.confidence(k)))
def _get_split_spans(node, span):
partition_spans = node.get_partition_spans(span)
for to_remove_span in partition_spans:
if to_remove_span[0] == span[0] and to_remove_span[1] in [span[1], span[1] + 1]:
partition_spans.remove(to_remove_span)
break
return partition_spans
class GuessFinder(object):
def __init__(self, guess_func, confidence=None, logger=None, options=None):
self.guess_func = guess_func
self.confidence = confidence
self.logger = logger or log
self.options = options
def process_nodes(self, nodes):
for node in nodes:
self.process_node(node)
def process_node(self, node, iterative=True, partial_span=None):
if partial_span:
value = node.value[partial_span[0]:partial_span[1]]
else:
value = node.value
string = ' %s ' % value # add sentinels
if not self.options:
matcher_result = self.guess_func(string, node)
else:
matcher_result = self.guess_func(string, node, self.options)
if matcher_result:
if not isinstance(matcher_result, Guess):
result, span = matcher_result
else:
result, span = matcher_result, matcher_result.metadata().span
if result:
# readjust span to compensate for sentinels
span = (span[0] - 1, span[1] - 1)
# readjust span to compensate for partial_span
if partial_span:
span = (span[0] + partial_span[0], span[1] + partial_span[0])
partition_spans = None
if self.options and 'skip_nodes' in self.options:
skip_nodes = self.options.get('skip_nodes')
for skip_node in skip_nodes:
if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
skip_node.span == span or\
skip_node.span == (span[0] + skip_node.offset, span[1] + skip_node.offset):
if partition_spans is None:
partition_spans = _get_split_spans(node, skip_node.span)
else:
new_partition_spans = []
for partition_span in partition_spans:
tmp_node = MatchTree(value, span=partition_span, parent=node)
tmp_partitions_spans = _get_split_spans(tmp_node, skip_node.span)
new_partition_spans.extend(tmp_partitions_spans)
partition_spans.extend(new_partition_spans)
if not partition_spans:
# restore sentinels compensation
if isinstance(result, Guess):
guess = result
else:
guess = Guess(result, confidence=self.confidence, input=string, span=span)
if not iterative:
found_guess(node, guess, logger=self.logger)
else:
absolute_span = (span[0] + node.offset, span[1] + node.offset)
node.partition(span)
if node.is_leaf():
found_guess(node, guess, logger=self.logger)
else:
found_child = None
for child in node.children:
if child.span == absolute_span:
found_guess(child, guess, logger=self.logger)
found_child = child
break
for child in node.children:
if child is not found_child:
self.process_node(child)
else:
for partition_span in partition_spans:
self.process_node(node, partial_span=partition_span)

View file

@ -1,426 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import guessit # @UnusedImport needed for doctests
from guessit import UnicodeMixin, base_text_type
from guessit.textutils import clean_default, str_fill
from guessit.patterns import group_delimiters
from guessit.guess import (merge_similar_guesses, smart_merge,
choose_int, choose_string, Guess)
from itertools import takewhile
import copy
import logging
log = logging.getLogger(__name__)
class BaseMatchTree(UnicodeMixin):
"""A BaseMatchTree is a tree covering the filename, where each
node represents a substring in the filename and can have a ``Guess``
associated with it that contains the information that has been guessed
in this node. Nodes can be further split into subnodes until a proper
split has been found.
Each node has the following attributes:
- string = the original string of which this node represents a region
- span = a pair of (begin, end) indices delimiting the substring
- parent = parent node
- children = list of children nodes
- guess = Guess()
BaseMatchTrees are displayed in the following way:
>>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
>>> print(guessit.IterativeMatcher(path).match_tree)
000000 1111111111111111 2222222222222222222222222222222222222222222 333
000000 0000000000111111 0000000000111111222222222222222222222222222 000
011112 011112000011111222222222222222222 000
011112222222222222
0000011112222
01112 0111
Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
tttttttttt yyyy yyyy fffff ssss aaa vvvv rrr ccc
Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
The last line contains the filename, which you can use a reference.
The previous line contains the type of property that has been found.
The line before that contains the filename, where all the found groups
have been blanked. Basically, what is left on this line are the leftover
groups which could not be identified.
The lines before that indicate the indices of the groups in the tree.
For instance, the part of the filename 'BDRip' is the leaf with index
``(2, 2, 1)`` (read from top to bottom), and its meaning is 'format'
(as shown by the ``f``'s on the last-but-one line).
"""
def __init__(self, string='', span=None, parent=None, clean_function=None):
self.string = string
self.span = span or (0, len(string))
self.parent = parent
self.children = []
self.guess = Guess()
self._clean_value = None
self._clean_function = clean_function or clean_default
@property
def value(self):
"""Return the substring that this node matches."""
return self.string[self.span[0]:self.span[1]]
@property
def clean_value(self):
"""Return a cleaned value of the matched substring, with better
presentation formatting (punctuation marks removed, duplicate
spaces, ...)"""
if self._clean_value is None:
self._clean_value = self.clean_string(self.value)
return self._clean_value
def clean_string(self, string):
return self._clean_function(string)
@property
def offset(self):
return self.span[0]
@property
def info(self):
"""Return a dict containing all the info guessed by this node,
subnodes included."""
result = dict(self.guess)
for c in self.children:
result.update(c.info)
return result
@property
def root(self):
"""Return the root node of the tree."""
if not self.parent:
return self
return self.parent.root
@property
def depth(self):
"""Return the depth of this node."""
if self.is_leaf():
return 0
return 1 + max(c.depth for c in self.children)
def is_leaf(self):
"""Return whether this node is a leaf or not."""
return self.children == []
def add_child(self, span):
"""Add a new child node to this node with the given span."""
child = MatchTree(self.string, span=span, parent=self, clean_function=self._clean_function)
self.children.append(child)
return child
def get_partition_spans(self, indices):
"""Return the list of absolute spans for the regions of the original
string defined by splitting this node at the given indices (relative
to this node)"""
indices = sorted(indices)
if indices[0] != 0:
indices.insert(0, 0)
if indices[-1] != len(self.value):
indices.append(len(self.value))
spans = []
for start, end in zip(indices[:-1], indices[1:]):
spans.append((self.offset + start,
self.offset + end))
return spans
def partition(self, indices):
"""Partition this node by splitting it at the given indices,
relative to this node."""
for partition_span in self.get_partition_spans(indices):
self.add_child(span=partition_span)
def split_on_components(self, components):
offset = 0
for c in components:
start = self.value.find(c, offset)
end = start + len(c)
self.add_child(span=(self.offset + start,
self.offset + end))
offset = end
def nodes_at_depth(self, depth):
"""Return all the nodes at a given depth in the tree"""
if depth == 0:
yield self
for child in self.children:
for node in child.nodes_at_depth(depth - 1):
yield node
@property
def node_idx(self):
"""Return this node's index in the tree, as a tuple.
If this node is the root of the tree, then return ()."""
if self.parent is None:
return ()
return self.parent.node_idx + (self.node_last_idx,)
@property
def node_last_idx(self):
if self.parent is None:
return None
return self.parent.children.index(self)
def node_at(self, idx):
"""Return the node at the given index in the subtree rooted at
this node."""
if not idx:
return self
try:
return self.children[idx[0]].node_at(idx[1:])
except IndexError:
raise ValueError('Non-existent node index: %s' % (idx,))
def nodes(self):
"""Return all the nodes and subnodes in this tree."""
yield self
for child in self.children:
for node in child.nodes():
yield node
def leaves(self):
"""Return a generator over all the nodes that are leaves."""
if self.is_leaf():
yield self
else:
for child in self.children:
# pylint: disable=W0212
for leaf in child.leaves():
yield leaf
def group_node(self):
return self._other_group_node(0)
def previous_group_node(self):
return self._other_group_node(-1)
def next_group_node(self):
return self._other_group_node(+1)
def _other_group_node(self, offset):
if len(self.node_idx) > 1:
group_idx = self.node_idx[:2]
if group_idx[1] + offset >= 0:
other_group_idx = (group_idx[0], group_idx[1] + offset)
try:
other_group_node = self.root.node_at(other_group_idx)
return other_group_node
except ValueError:
pass
return None
def previous_leaf(self, leaf):
"""Return previous leaf for this node"""
return self._other_leaf(leaf, -1)
def next_leaf(self, leaf):
"""Return next leaf for this node"""
return self._other_leaf(leaf, +1)
def _other_leaf(self, leaf, offset):
leaves = list(self.leaves())
index = leaves.index(leaf) + offset
if index > 0 and index < len(leaves):
return leaves[index]
return None
def previous_leaves(self, leaf):
"""Return previous leaves for this node"""
leaves = list(self.leaves())
index = leaves.index(leaf)
if index > 0 and index < len(leaves):
previous_leaves = leaves[:index]
previous_leaves.reverse()
return previous_leaves
return []
def next_leaves(self, leaf):
"""Return next leaves for this node"""
leaves = list(self.leaves())
index = leaves.index(leaf)
if index > 0 and index < len(leaves):
return leaves[index + 1:len(leaves)]
return []
def to_string(self):
"""Return a readable string representation of this tree.
The result is a multi-line string, where the lines are:
- line 1 -> N-2: each line contains the nodes at the given depth in the tree
- line N-2: original string where all the found groups have been blanked
- line N-1: type of property that has been found
- line N: the original string, which you can use a reference.
"""
empty_line = ' ' * len(self.string)
def to_hex(x):
if isinstance(x, int):
return str(x) if x < 10 else chr(55 + x)
return x
def meaning(result):
mmap = {'episodeNumber': 'E',
'season': 'S',
'extension': 'e',
'format': 'f',
'language': 'l',
'country': 'C',
'videoCodec': 'v',
'videoProfile': 'v',
'audioCodec': 'a',
'audioProfile': 'a',
'audioChannels': 'a',
'website': 'w',
'container': 'c',
'series': 'T',
'title': 't',
'date': 'd',
'year': 'y',
'releaseGroup': 'r',
'screenSize': 's',
'other': 'o'
}
if result is None:
return ' '
for prop, l in mmap.items():
if prop in result:
return l
return 'x'
lines = [empty_line] * (self.depth + 2) # +2: remaining, meaning
lines[-2] = self.string
for node in self.nodes():
if node == self:
continue
idx = node.node_idx
depth = len(idx) - 1
if idx:
lines[depth] = str_fill(lines[depth], node.span,
to_hex(idx[-1]))
if node.guess:
lines[-2] = str_fill(lines[-2], node.span, '_')
lines[-1] = str_fill(lines[-1], node.span, meaning(node.guess))
lines.append(self.string)
return '\n'.join(l.rstrip() for l in lines)
def __unicode__(self):
return self.to_string()
def __repr__(self):
return '<MatchTree: root=%s>' % self.value
class MatchTree(BaseMatchTree):
"""The MatchTree contains a few "utility" methods which are not necessary
for the BaseMatchTree, but add a lot of convenience for writing
higher-level rules.
"""
def unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) > 0):
"""Return a generator of leaves that are not empty."""
for leaf in self.leaves():
if not leaf.guess and valid(leaf):
yield leaf
def leaves_containing(self, property_name):
"""Return a generator of leaves that guessed the given property."""
if isinstance(property_name, base_text_type):
property_name = [property_name]
for leaf in self.leaves():
for prop in property_name:
if prop in leaf.guess:
yield leaf
break
def first_leaf_containing(self, property_name):
"""Return the first leaf containing the given property."""
try:
return next(self.leaves_containing(property_name))
except StopIteration:
return None
def previous_unidentified_leaves(self, node):
"""Return a generator of non-empty leaves that are before the given
node (in the string)."""
node_idx = node.node_idx
for leaf in self.unidentified_leaves():
if leaf.node_idx < node_idx:
yield leaf
def previous_leaves_containing(self, node, property_name):
"""Return a generator of leaves containing the given property that are
before the given node (in the string)."""
node_idx = node.node_idx
for leaf in self.leaves_containing(property_name):
if leaf.node_idx < node_idx:
yield leaf
def is_explicit(self):
"""Return whether the group was explicitly enclosed by
parentheses/square brackets/etc."""
return (self.value[0] + self.value[-1]) in group_delimiters
def matched(self):
"""Return a single guess that contains all the info found in the
nodes of this tree, trying to merge properties as good as possible.
"""
if not getattr(self, '_matched_result', None):
# we need to make a copy here, as the merge functions work in place and
# calling them on the match tree would modify it
parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]
result = smart_merge(parts)
log.debug('Final result: ' + result.nice_string())
self._matched_result = result
for unidentified_leaves in self.unidentified_leaves():
if 'unidentified' not in self._matched_result:
self._matched_result['unidentified'] = []
self._matched_result['unidentified'].append(unidentified_leaves.clean_value)
return self._matched_result

View file

@ -1,7 +1,20 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Options
"""
from argparse import ArgumentParser
import shlex
import six
def build_opts(transformers=None):
def build_argument_parser():
"""
Builds the argument parser
:return: the argument parser
:rtype: ArgumentParser
"""
opts = ArgumentParser()
opts.add_argument(dest='filename', help='Filename or release name to guess', nargs='*')
@ -9,61 +22,67 @@ def build_opts(transformers=None):
naming_opts.add_argument('-t', '--type', dest='type', default=None,
help='The suggested file type: movie, episode. If undefined, type will be guessed.')
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=False,
help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.')
naming_opts.add_argument('-c', '--split-camel', dest='split_camel', action='store_true', default=False,
help='Split camel case part of filename.')
help='Parse files as name only, considering "/" and "\\" like other separators.')
naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
help='If short date is found, consider the first digits as the year.')
naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
help='If short date is found, consider the second digits as the day.')
naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages',
help='Allowed language (can be used multiple times)')
naming_opts.add_argument('-C', '--allowed-countries', action='append', dest='allowed_countries',
help='Allowed country (can be used multiple times)')
naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number',
default=False,
help='Guess "serie.213.avi" as the episode 213. Without this option, '
'it will be guessed as season 2, episode 13')
naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title',
help='Expected title to parse (can be used multiple times)')
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group',
help='Expected release group (can be used multiple times)')
naming_opts.add_argument('-X', '--disabled-transformer', action='append', dest='disabled_transformers',
help='Transformer to disable (can be used multiple time)')
input_opts = opts.add_argument_group("Input")
input_opts.add_argument('-f', '--input-file', dest='input_file', default=False,
help='Read filenames from an input text file. File should use UTF-8 charset.')
output_opts = opts.add_argument_group("Output")
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False,
help='Display debug output')
output_opts.add_argument('-P', '--show-property', dest='show_property', default=None,
help='Display the value of a single property (title, series, videoCodec, year, type ...)'),
output_opts.add_argument('-u', '--unidentified', dest='unidentified', action='store_true', default=False,
help='Display the unidentified parts.'),
help='Display the value of a single property (title, series, video_codec, year, ...)')
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=False,
help='Display advanced information for filename guesses, as json output')
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=False,
help='Display information for filename guesses as json output')
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=False,
help='Display information for filename guesses as yaml output (like unit-test)')
output_opts.add_argument('-f', '--input-file', dest='input_file', default=False,
help='Read filenames from an input file.')
output_opts.add_argument('-d', '--demo', action='store_true', dest='demo', default=False,
help='Run a few builtin tests instead of analyzing a file')
help='Display information for filename guesses as yaml output')
information_opts = opts.add_argument_group("Information")
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=False,
help='Display properties that can be guessed.')
information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=False,
help='Display property values that can be guessed.')
information_opts.add_argument('-s', '--transformers', dest='transformers', action='store_true', default=False,
help='Display transformers that can be used.')
information_opts.add_argument('--version', dest='version', action='store_true', default=False,
help='Display the guessit version.')
webservice_opts = opts.add_argument_group("guessit.io")
webservice_opts.add_argument('-b', '--bug', action='store_true', dest='submit_bug', default=False,
help='Submit a wrong detection to the guessit.io service')
other_opts = opts.add_argument_group("Other features")
other_opts.add_argument('-i', '--info', dest='info', default='filename',
help='The desired information type: filename, video, hash_mpc or a hash from python\'s '
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
'them, comma-separated')
if transformers:
for transformer in transformers:
transformer.register_arguments(opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts)
return opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts
_opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = None, None, None, None, None, None
return opts
def reload(transformers=None):
global _opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts
_opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = build_opts(transformers)
def parse_options(options):
"""
Parse given option string
:param options:
:type options:
:return:
:rtype:
"""
if isinstance(options, six.string_types):
args = shlex.split(options)
options = vars(argument_parser.parse_args(args))
if options is None:
options = {}
return options
def get_opts():
return _opts
argument_parser = build_argument_parser()

View file

@ -1,77 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import re
from guessit import base_text_type
group_delimiters = ['()', '[]', '{}']
# separator character regexp
sep = r'[][,)(}:{+ /~/\._-]' # regexp art, hehe :D
_dash = '-'
_psep = '[\W_]?'
def build_or_pattern(patterns, escape=False):
"""Build a or pattern string from a list of possible patterns
"""
or_pattern = []
for pattern in patterns:
if not or_pattern:
or_pattern.append('(?:')
else:
or_pattern.append('|')
or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
or_pattern.append(')')
return ''.join(or_pattern)
def compile_pattern(pattern, enhance=True):
"""Compile and enhance a pattern
:param pattern: Pattern to compile (regexp).
:type pattern: string
:param pattern: Enhance pattern before compiling.
:type pattern: string
:return: The compiled pattern
:rtype: regular expression object
"""
return re.compile(enhance_pattern(pattern) if enhance else pattern, re.IGNORECASE)
def enhance_pattern(pattern):
"""Enhance pattern to match more equivalent values.
'-' are replaced by '[\W_]?', which matches more types of separators (or none)
:param pattern: Pattern to enhance (regexp).
:type pattern: string
:return: The enhanced pattern
:rtype: string
"""
return pattern.replace(_dash, _psep)

View file

@ -1,32 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
subtitle_exts = ['srt', 'idx', 'sub', 'ssa', 'ass']
info_exts = ['nfo']
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
'iso']

View file

@ -1,150 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import re
digital_numeral = '\d{1,4}'
roman_numeral = "(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})"
english_word_numeral_list = [
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
]
french_word_numeral_list = [
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
]
french_alt_word_numeral_list = [
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
]
def __build_word_numeral(*args, **kwargs):
re_ = None
for word_list in args:
for word in word_list:
if not re_:
re_ = '(?:(?=\w+)'
else:
re_ += '|'
re_ += word
re_ += ')'
return re_
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
__romanNumeralMap = (
('M', 1000),
('CM', 900),
('D', 500),
('CD', 400),
('C', 100),
('XC', 90),
('L', 50),
('XL', 40),
('X', 10),
('IX', 9),
('V', 5),
('IV', 4),
('I', 1)
)
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
def __parse_roman(value):
"""convert Roman numeral to integer"""
if not __romanNumeralPattern.search(value):
raise ValueError('Invalid Roman numeral: %s' % value)
result = 0
index = 0
for num, integer in __romanNumeralMap:
while value[index:index + len(num)] == num:
result += integer
index += len(num)
return result
def __parse_word(value):
"""Convert Word numeral to integer"""
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
try:
return word_list.index(value.lower())
except ValueError:
pass
raise ValueError
_clean_re = re.compile('[^\d]*(\d+)[^\d]*')
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
"""Parse a numeric value into integer.
input can be an integer as a string, a roman numeral or a word
:param value: Value to parse. Can be an integer, roman numeral or word.
:type value: string
:return: Numeric value, or None if value can't be parsed
:rtype: int
"""
if int_enabled:
try:
if clean:
match = _clean_re.match(value)
if match:
clean_value = match.group(1)
return int(clean_value)
return int(value)
except ValueError:
pass
if roman_enabled:
try:
if clean:
for word in value.split():
try:
return __parse_roman(word.upper())
except ValueError:
pass
return __parse_roman(value)
except ValueError:
pass
if word_enabled:
try:
if clean:
for word in value.split():
try:
return __parse_word(word)
except ValueError:
pass
return __parse_word(value)
except ValueError:
pass
raise ValueError('Invalid numeral: ' + value)

View file

@ -1,21 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals

View file

@ -1,219 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.options import reload as reload_options
from stevedore import ExtensionManager
from pkg_resources import EntryPoint
from stevedore.extension import Extension
from logging import getLogger
log = getLogger(__name__)
class Transformer(object): # pragma: no cover
def __init__(self, priority=0):
self.priority = priority
self.log = getLogger(self.name)
@property
def name(self):
return self.__class__.__name__
def supported_properties(self):
return {}
def second_pass_options(self, mtree, options=None):
return None
def should_process(self, mtree, options=None):
return True
def process(self, mtree, options=None):
pass
def post_process(self, mtree, options=None):
pass
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
pass
def rate_quality(self, guess, *props):
return 0
class CustomTransformerExtensionManager(ExtensionManager):
def __init__(self, namespace='guessit.transformer', invoke_on_load=True,
invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None,
verify_requirements=False):
super(CustomTransformerExtensionManager, self).__init__(namespace=namespace,
invoke_on_load=invoke_on_load,
invoke_args=invoke_args,
invoke_kwds=invoke_kwds,
propagate_map_exceptions=propagate_map_exceptions,
on_load_failure_callback=on_load_failure_callback,
verify_requirements=verify_requirements)
def order_extensions(self, extensions):
"""Order the loaded transformers
It should follow those rules
- website before language (eg: tvu.org.ru vs russian)
- language before episodes_rexps
- properties before language (eg: he-aac vs hebrew)
- release_group before properties (eg: XviD-?? vs xvid)
"""
extensions.sort(key=lambda ext: -ext.obj.priority)
return extensions
def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements=True):
if not ep.dist:
# `require` argument of ep.load() is deprecated in newer versions of setuptools
if hasattr(ep, 'resolve'):
plugin = ep.resolve()
elif hasattr(ep, '_load'):
plugin = ep._load()
else:
plugin = ep.load(require=False)
else:
plugin = ep.load()
if invoke_on_load:
obj = plugin(*invoke_args, **invoke_kwds)
else:
obj = None
return Extension(ep.name, ep, plugin, obj)
def _load_plugins(self, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
return self.order_extensions(super(CustomTransformerExtensionManager, self)._load_plugins(invoke_on_load, invoke_args, invoke_kwds, verify_requirements))
def objects(self):
return self.map(self._get_obj)
def _get_obj(self, ext):
return ext.obj
def object(self, name):
try:
return self[name].obj
except KeyError:
return None
def register_module(self, name=None, module_name=None, attrs=(), entry_point=None):
if entry_point:
ep = EntryPoint.parse(entry_point)
else:
ep = EntryPoint(name, module_name, attrs)
loaded = self._load_one_plugin(ep, invoke_on_load=True, invoke_args=(), invoke_kwds={})
if loaded:
self.extensions.append(loaded)
self.extensions = self.order_extensions(self.extensions)
self._extensions_by_name = None
class DefaultTransformerExtensionManager(CustomTransformerExtensionManager):
@property
def _internal_entry_points(self):
return ['split_path_components = guessit.transfo.split_path_components:SplitPathComponents',
'guess_filetype = guessit.transfo.guess_filetype:GuessFiletype',
'split_explicit_groups = guessit.transfo.split_explicit_groups:SplitExplicitGroups',
'guess_date = guessit.transfo.guess_date:GuessDate',
'guess_website = guessit.transfo.guess_website:GuessWebsite',
'guess_release_group = guessit.transfo.guess_release_group:GuessReleaseGroup',
'guess_properties = guessit.transfo.guess_properties:GuessProperties',
'guess_language = guessit.transfo.guess_language:GuessLanguage',
'guess_video_rexps = guessit.transfo.guess_video_rexps:GuessVideoRexps',
'guess_episodes_rexps = guessit.transfo.guess_episodes_rexps:GuessEpisodesRexps',
'guess_weak_episodes_rexps = guessit.transfo.guess_weak_episodes_rexps:GuessWeakEpisodesRexps',
'guess_bonus_features = guessit.transfo.guess_bonus_features:GuessBonusFeatures',
'guess_year = guessit.transfo.guess_year:GuessYear',
'guess_country = guessit.transfo.guess_country:GuessCountry',
'guess_idnumber = guessit.transfo.guess_idnumber:GuessIdnumber',
'split_on_dash = guessit.transfo.split_on_dash:SplitOnDash',
'guess_episode_info_from_position = guessit.transfo.guess_episode_info_from_position:GuessEpisodeInfoFromPosition',
'guess_movie_title_from_position = guessit.transfo.guess_movie_title_from_position:GuessMovieTitleFromPosition',
'guess_episode_details = guessit.transfo.guess_episode_details:GuessEpisodeDetails',
'expected_series = guessit.transfo.expected_series:ExpectedSeries',
'expected_title = guessit.transfo.expected_title:ExpectedTitle',]
def _find_entry_points(self, namespace):
entry_points = {}
# Internal entry points
if namespace == self.namespace:
for internal_entry_point_str in self._internal_entry_points:
internal_entry_point = EntryPoint.parse(internal_entry_point_str)
entry_points[internal_entry_point.name] = internal_entry_point
# Package entry points
setuptools_entrypoints = super(DefaultTransformerExtensionManager, self)._find_entry_points(namespace)
for setuptools_entrypoint in setuptools_entrypoints:
entry_points[setuptools_entrypoint.name] = setuptools_entrypoint
return list(entry_points.values())
_extensions = None
def all_transformers():
return _extensions.objects()
def get_transformer(name):
return _extensions.object(name)
def add_transformer(name, module_name, class_name):
"""
Add a transformer
:param name: the name of the transformer. ie: 'guess_regexp_id'
:param name: the module name. ie: 'flexget.utils.parsers.transformers.guess_regexp_id'
:param class_name: the class name. ie: 'GuessRegexpId'
"""
_extensions.register_module(name, module_name, (class_name,))
def add_transformer(entry_point):
"""
Add a transformer
:param entry_point: entry point spec format. ie: 'guess_regexp_id = flexget.utils.parsers.transformers.guess_regexp_id:GuessRegexpId'
"""
_extensions.register_module(entry_point = entry_point)
def reload(custom=False):
"""
Reload extension manager with default or custom one.
:param custom: if True, custom manager will be used, else default one.
Default manager will load default extensions from guessit and setuptools packaging extensions
Custom manager will not load default extensions from guessit, using only setuptools packaging extensions.
:type custom: boolean
"""
global _extensions
if custom:
_extensions = CustomTransformerExtensionManager()
else:
_extensions = DefaultTransformerExtensionManager()
reload_options(all_transformers())
reload()

View file

@ -1,65 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import all_transformers
def best_quality_properties(props, *guesses):
"""Retrieve the best quality guess, based on given properties
:param props: Properties to include in the rating
:type props: list of strings
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
for transformer in all_transformers():
rate = transformer.rate_quality(guess, *props)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess
def best_quality(*guesses):
"""Retrieve the best quality guess.
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
for transformer in all_transformers():
rate = transformer.rate_quality(guess)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess

35
libs/guessit/reutils.py Normal file
View file

@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Utils for re module
"""
from rebulk.remodule import re
def build_or_pattern(patterns, name=None, escape=False):
"""
Build a or pattern string from a list of possible patterns
:param patterns:
:type patterns:
:param name:
:type name:
:param escape:
:type escape:
:return:
:rtype:
"""
or_pattern = []
for pattern in patterns:
if not or_pattern:
or_pattern.append('(?')
if name:
or_pattern.append('P<' + name + '>')
else:
or_pattern.append(':')
else:
or_pattern.append('|')
or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
or_pattern.append(')')
return ''.join(or_pattern)

View file

@ -0,0 +1,88 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Rebulk object default builder
"""
from rebulk import Rebulk
from .markers.path import path
from .markers.groups import groups
from .properties.episodes import episodes
from .properties.container import container
from .properties.format import format_
from .properties.video_codec import video_codec
from .properties.audio_codec import audio_codec
from .properties.screen_size import screen_size
from .properties.website import website
from .properties.date import date
from .properties.title import title
from .properties.episode_title import episode_title
from .properties.language import language
from .properties.country import country
from .properties.release_group import release_group
from .properties.other import other
from .properties.edition import edition
from .properties.cds import cds
from .properties.bonus import bonus
from .properties.film import film
from .properties.part import part
from .properties.crc import crc
from .properties.mimetype import mimetype
from .properties.type import type_
from .processors import processors
def rebulk_builder():
"""
Default builder for main Rebulk object used by api.
:return: Main Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()
rebulk.rebulk(path())
rebulk.rebulk(groups())
rebulk.rebulk(episodes())
rebulk.rebulk(container())
rebulk.rebulk(format_())
rebulk.rebulk(video_codec())
rebulk.rebulk(audio_codec())
rebulk.rebulk(screen_size())
rebulk.rebulk(website())
rebulk.rebulk(date())
rebulk.rebulk(title())
rebulk.rebulk(episode_title())
rebulk.rebulk(language())
rebulk.rebulk(country())
rebulk.rebulk(release_group())
rebulk.rebulk(other())
rebulk.rebulk(edition())
rebulk.rebulk(cds())
rebulk.rebulk(bonus())
rebulk.rebulk(film())
rebulk.rebulk(part())
rebulk.rebulk(crc())
rebulk.rebulk(processors())
rebulk.rebulk(mimetype())
rebulk.rebulk(type_())
def customize_properties(properties):
"""
Customize default rebulk properties
"""
count = properties['count']
del properties['count']
properties['season_count'] = count
properties['episode_count'] = count
return properties
rebulk.customize_properties = customize_properties
return rebulk

View file

@ -0,0 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Common module
"""
import re
seps = r' [](){}+*|=-_~#/\\.,;:' # list of tags/words separators
seps_no_fs = seps.replace('/', '').replace('\\', '')
title_seps = r'-+/\|' # separators for title
dash = (r'-', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.

View file

@ -0,0 +1,68 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Comparators
"""
try:
from functools import cmp_to_key
except ImportError:
from ...backports import cmp_to_key
def marker_comparator_predicate(match):
"""
Match predicate used in comparator
"""
return not match.private and \
match.name not in ['proper_count', 'title', 'episode_title', 'alternative_title'] and \
not (match.name == 'container' and 'extension' in match.tags)
def marker_weight(matches, marker):
"""
Compute the comparator weight of a marker
:param matches:
:param marker:
:return:
"""
return len(set(match.name for match in matches.range(*marker.span, predicate=marker_comparator_predicate)))
def marker_comparator(matches, markers):
"""
Builds a comparator that returns markers sorted from the most valuable to the less.
Take the parts where matches count is higher, then when length is higher, then when position is at left.
:param matches:
:type matches:
:return:
:rtype:
"""
def comparator(marker1, marker2):
"""
The actual comparator function.
"""
matches_count = marker_weight(matches, marker2) - marker_weight(matches, marker1)
if matches_count:
return matches_count
len_diff = len(marker2) - len(marker1)
if len_diff:
return len_diff
return markers.index(marker2) - markers.index(marker1)
return comparator
def marker_sorted(markers, matches):
"""
Sort markers from matches, from the most valuable to the less.
:param fileparts:
:type fileparts:
:param matches:
:type matches:
:return:
:rtype:
"""
return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers)))

View file

@ -0,0 +1,125 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Date
"""
from dateutil import parser
from rebulk.remodule import re
_dsep = r'[-/ \.]'
_dsep_bis = r'[-/ \.x]'
date_regexps = [
re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
re.IGNORECASE)]
def valid_year(year):
"""Check if number is a valid year"""
return 1920 <= year < 2030
def _is_int(string):
"""
Check if the input string is an integer
:param string:
:type string:
:return:
:rtype:
"""
try:
int(string)
return True
except ValueError:
return False
def _guess_day_first_parameter(groups):
"""
If day_first is not defined, use some heuristic to fix it.
It helps to solve issues with python dateutils 2.5.3 parser changes.
:param groups: match groups found for the date
:type groups: list of match objects
:return: day_first option guessed value
:rtype: bool
"""
# If match starts with a long year, then day_first is force to false.
if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
return False
# If match ends with a long year, the day_first is forced to true.
elif _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
return True
# If match starts with a short year, then day_first is force to false.
elif _is_int(groups[0]) and int(groups[0][:2]) > 31:
return False
# If match ends with a short year, then day_first is force to true.
elif _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
return True
def search_date(string, year_first=None, day_first=None):
"""Looks for date patterns, and if found return the date and group span.
Assumes there are sentinels at the beginning and end of the string that
always allow matching a non-digit delimiting the date.
Year can be defined on two digit only. It will return the nearest possible
date from today.
>>> search_date(' This happened on 2002-04-22. ')
(18, 28, datetime.date(2002, 4, 22))
>>> search_date(' And this on 17-06-1998. ')
(13, 23, datetime.date(1998, 6, 17))
>>> search_date(' no date in here ')
"""
start, end = None, None
match = None
groups = None
for date_re in date_regexps:
search_match = date_re.search(string)
if search_match and (match is None or search_match.end() - search_match.start() > len(match)):
start, end = search_match.start(1), search_match.end(1)
groups = search_match.groups()[1:]
match = '-'.join(groups)
if match is None:
return
if year_first and day_first is None:
day_first = False
if day_first is None:
day_first = _guess_day_first_parameter(groups)
# If day_first/year_first is undefined, parse is made using both possible values.
yearfirst_opts = [False, True]
if year_first is not None:
yearfirst_opts = [year_first]
dayfirst_opts = [True, False]
if day_first is not None:
dayfirst_opts = [day_first]
kwargs_list = ({'dayfirst': d, 'yearfirst': y} for d in dayfirst_opts for y in yearfirst_opts)
for kwargs in kwargs_list:
try:
date = parser.parse(match, **kwargs)
except (ValueError, TypeError): # pragma: no cover
# see https://bugs.launchpad.net/dateutil/+bug/1247643
date = None
# check date plausibility
if date and valid_year(date.year): # pylint:disable=no-member
return start, end, date.date() # pylint:disable=no-member

View file

@ -0,0 +1,136 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Formatters
"""
from rebulk.formatters import formatters
from rebulk.remodule import re
from . import seps
_excluded_clean_chars = ',:;-/\\'
clean_chars = ""
for sep in seps:
if sep not in _excluded_clean_chars:
clean_chars += sep
def _potential_before(i, input_string):
"""
Check if the character at position i can be a potential single char separator considering what's before it.
:param i:
:type i: int
:param input_string:
:type input_string: str
:return:
:rtype: bool
"""
return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps
def _potential_after(i, input_string):
"""
Check if the character at position i can be a potential single char separator considering what's after it.
:param i:
:type i: int
:param input_string:
:type input_string: str
:return:
:rtype: bool
"""
return i + 2 >= len(input_string) or \
input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps
def cleanup(input_string):
"""
Removes and strip separators from input_string (but keep ',;' characters)
It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)
:param input_string:
:type input_string: str
:return:
:rtype:
"""
clean_string = input_string
for char in clean_chars:
clean_string = clean_string.replace(char, ' ')
# Restore input separator if they separate single characters.
# Useful for Mavels Agents of S.H.I.E.L.D.
# https://github.com/guessit-io/guessit/issues/278
indices = [i for i, letter in enumerate(clean_string) if letter in seps]
dots = set()
if indices:
clean_list = list(clean_string)
potential_indices = []
for i in indices:
if _potential_before(i, input_string) and _potential_after(i, input_string):
potential_indices.append(i)
replace_indices = []
for potential_index in potential_indices:
if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
replace_indices.append(potential_index)
if replace_indices:
for replace_index in replace_indices:
dots.add(input_string[replace_index])
clean_list[replace_index] = input_string[replace_index]
clean_string = ''.join(clean_list)
clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))
clean_string = re.sub(' +', ' ', clean_string)
return clean_string
def strip(input_string, chars=seps):
"""
Strip separators from input_string
:param input_string:
:param chars:
:type input_string:
:return:
:rtype:
"""
return input_string.strip(chars)
def raw_cleanup(raw):
"""
Cleanup a raw value to perform raw comparison
:param raw:
:type raw:
:return:
:rtype:
"""
return formatters(cleanup, strip)(raw.lower())
def reorder_title(title, articles=('the',), separators=(',', ', ')):
"""
Reorder the title
:param title:
:type title:
:param articles:
:type articles:
:param separators:
:type separators:
:return:
:rtype:
"""
ltitle = title.lower()
for article in articles:
for separator in separators:
suffix = separator + article
if ltitle[-len(suffix):] == suffix:
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
return title

View file

@ -0,0 +1,165 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
parse numeral from various formats
"""
from rebulk.remodule import re
digital_numeral = r'\d{1,4}'
roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
english_word_numeral_list = [
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
]
french_word_numeral_list = [
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
]
french_alt_word_numeral_list = [
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
]
def __build_word_numeral(*args):
"""
Build word numeral regexp from list.
:param args:
:type args:
:param kwargs:
:type kwargs:
:return:
:rtype:
"""
re_ = None
for word_list in args:
for word in word_list:
if not re_:
re_ = r'(?:(?=\w+)'
else:
re_ += '|'
re_ += word
re_ += ')'
return re_
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
__romanNumeralMap = (
('M', 1000),
('CM', 900),
('D', 500),
('CD', 400),
('C', 100),
('XC', 90),
('L', 50),
('XL', 40),
('X', 10),
('IX', 9),
('V', 5),
('IV', 4),
('I', 1)
)
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
def __parse_roman(value):
"""
convert Roman numeral to integer
:param value: Value to parse
:type value: string
:return:
:rtype:
"""
if not __romanNumeralPattern.search(value):
raise ValueError('Invalid Roman numeral: %s' % value)
result = 0
index = 0
for num, integer in __romanNumeralMap:
while value[index:index + len(num)] == num:
result += integer
index += len(num)
return result
def __parse_word(value):
"""
Convert Word numeral to integer
:param value: Value to parse
:type value: string
:return:
:rtype:
"""
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
try:
return word_list.index(value.lower())
except ValueError:
pass
raise ValueError # pragma: no cover
_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
"""
Parse a numeric value into integer.
:param value: Value to parse. Can be an integer, roman numeral or word.
:type value: string
:param int_enabled:
:type int_enabled:
:param roman_enabled:
:type roman_enabled:
:param word_enabled:
:type word_enabled:
:param clean:
:type clean:
:return: Numeric value, or None if value can't be parsed
:rtype: int
"""
# pylint: disable=too-many-branches
if int_enabled:
try:
if clean:
match = _clean_re.match(value)
if match:
clean_value = match.group(1)
return int(clean_value)
return int(value)
except ValueError:
pass
if roman_enabled:
try:
if clean:
for word in value.split():
try:
return __parse_roman(word.upper())
except ValueError:
pass
return __parse_roman(value)
except ValueError:
pass
if word_enabled:
try:
if clean:
for word in value.split():
try:
return __parse_word(word)
except ValueError: # pragma: no cover
pass
return __parse_word(value) # pragma: no cover
except ValueError: # pragma: no cover
pass
raise ValueError('Invalid numeral: ' + value) # pragma: no cover

View file

@ -0,0 +1,51 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Validators
"""
from functools import partial
from rebulk.validators import chars_before, chars_after, chars_surround
from . import seps
seps_before = partial(chars_before, seps)
seps_after = partial(chars_after, seps)
seps_surround = partial(chars_surround, seps)
def int_coercable(string):
"""
Check if string can be coerced to int
:param string:
:type string:
:return:
:rtype:
"""
try:
int(string)
return True
except ValueError:
return False
def compose(*validators):
"""
Compose validators functions
:param validators:
:type validators:
:return:
:rtype:
"""
def composed(string):
"""
Composed validators function
:param string:
:type string:
:return:
:rtype:
"""
for validator in validators:
if not validator(string):
return False
return True
return composed

View file

@ -0,0 +1,77 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Words utils
"""
from collections import namedtuple
from guessit.rules.common import seps
_Word = namedtuple('_Word', ['span', 'value'])
def iter_words(string):
"""
Iterate on all words in a string
:param string:
:type string:
:return:
:rtype: iterable[str]
"""
i = 0
last_sep_index = -1
inside_word = False
for char in string:
if ord(char) < 128 and char in seps: # Make sure we don't exclude unicode characters.
if inside_word:
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
inside_word = False
last_sep_index = i
else:
inside_word = True
i += 1
if inside_word:
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
# list of common words which could be interpreted as properties, but which
# are far too common to be able to say they represent a property in the
# middle of a string (where they most likely carry their commmon meaning)
COMMON_WORDS = frozenset([
# english words
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb',
'bt', 'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice',
'ay', 'at', 'star', 'so', 'he', 'do', 'ax', 'mx',
# french words
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
'ne', 'ma', 'va', 'au', 'lu',
# japanese words,
'wa', 'ga', 'ao',
# spanish words
'la', 'el', 'del', 'por', 'mar', 'al',
# other
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
'vi', 'ben', 'da', 'lt', 'ch', 'sr', 'ps', 'cx', 'vo',
# new from babelfish
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
'fer', 'fun', 'two', 'big', 'psy', 'air',
# movie title
'brazil', 'jordan',
# release groups
'bs', # Bosnian
'kz',
# countries
'gt', 'lt', 'im',
# part/pt
'pt',
# screener
'scr',
# quality
'sd', 'hr'
])

View file

@ -0,0 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Markers
"""

View file

@ -0,0 +1,49 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Groups markers (...), [...] and {...}
"""
from rebulk import Rebulk
def groups():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()
rebulk.defaults(name="group", marker=True)
starting = '([{'
ending = ')]}'
def mark_groups(input_string):
"""
Functional pattern to mark groups (...), [...] and {...}.
:param input_string:
:return:
"""
openings = ([], [], [])
i = 0
ret = []
for char in input_string:
start_type = starting.find(char)
if start_type > -1:
openings[start_type].append(i)
i += 1
end_type = ending.find(char)
if end_type > -1:
try:
start_index = openings[end_type].pop()
ret.append((start_index, i))
except IndexError:
pass
return ret
rebulk.functional(mark_groups)
return rebulk

View file

@ -0,0 +1,43 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Path markers
"""
from rebulk import Rebulk
from rebulk.utils import find_all
def path():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()
rebulk.defaults(name="path", marker=True)
def mark_path(input_string, context):
"""
Functional pattern to mark path elements.
:param input_string:
:return:
"""
ret = []
if context.get('name_only', False):
ret.append((0, len(input_string)))
else:
indices = list(find_all(input_string, '/'))
indices += list(find_all(input_string, '\\'))
indices += [-1, len(input_string)]
indices.sort()
for i in range(0, len(indices) - 1):
ret.append((indices[i] + 1, indices[i + 1]))
return ret
rebulk.functional(mark_path)
return rebulk

View file

@ -0,0 +1,198 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Processors
"""
from collections import defaultdict
import copy
import six
from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
from guessit.rules.common.words import iter_words
from .common.formatters import cleanup
from .common.comparators import marker_sorted
from .common.date import valid_year
class EnlargeGroupMatches(CustomRule):
"""
Enlarge matches that are starting and/or ending group to include brackets in their span.
:param matches:
:type matches:
:return:
:rtype:
"""
priority = PRE_PROCESS
def when(self, matches, context):
starting = []
ending = []
for group in matches.markers.named('group'):
for match in matches.starting(group.start + 1):
starting.append(match)
for match in matches.ending(group.end - 1):
ending.append(match)
if starting or ending:
return starting, ending
def then(self, matches, when_response, context):
starting, ending = when_response
for match in starting:
matches.remove(match)
match.start -= 1
match.raw_start += 1
matches.append(match)
for match in ending:
matches.remove(match)
match.end += 1
match.raw_end -= 1
matches.append(match)
class EquivalentHoles(Rule):
"""
Creates equivalent matches for holes that have same values than existing (case insensitive)
"""
priority = POST_PROCESS
consequence = AppendMatch
def when(self, matches, context):
new_matches = []
for filepath in marker_sorted(matches.markers.named('path'), matches):
holes = matches.holes(start=filepath.start, end=filepath.end, formatter=cleanup)
for name in matches.names:
for hole in list(holes):
for current_match in matches.named(name):
if isinstance(current_match.value, six.string_types) and \
hole.value.lower() == current_match.value.lower():
if 'equivalent-ignore' in current_match.tags:
continue
new_value = _preferred_string(hole.value, current_match.value)
if hole.value != new_value:
hole.value = new_value
if current_match.value != new_value:
current_match.value = new_value
hole.name = name
hole.tags = ['equivalent']
new_matches.append(hole)
if hole in holes:
holes.remove(hole)
return new_matches
class RemoveAmbiguous(Rule):
"""
If multiple match are found with same name and different values, keep the one in the most valuable filepart.
Also keep others match with same name and values than those kept ones.
"""
priority = POST_PROCESS
consequence = RemoveMatch
def when(self, matches, context):
fileparts = marker_sorted(matches.markers.named('path'), matches)
previous_fileparts_names = set()
values = defaultdict(list)
to_remove = []
for filepart in fileparts:
filepart_matches = matches.range(filepart.start, filepart.end)
filepart_names = set()
for match in filepart_matches:
filepart_names.add(match.name)
if match.name in previous_fileparts_names:
if match.value not in values[match.name]:
to_remove.append(match)
else:
if match.value not in values[match.name]:
values[match.name].append(match.value)
previous_fileparts_names.update(filepart_names)
return to_remove
def _preferred_string(value1, value2): # pylint:disable=too-many-return-statements
"""
Retrieves preferred title from both values.
:param value1:
:type value1: str
:param value2:
:type value2: str
:return: The preferred title
:rtype: str
"""
if value1 == value2:
return value1
if value1.istitle() and not value2.istitle():
return value1
if not value1.isupper() and value2.isupper():
return value1
if not value1.isupper() and value1[0].isupper() and not value2[0].isupper():
return value1
if _count_title_words(value1) > _count_title_words(value2):
return value1
return value2
def _count_title_words(value):
"""
Count only many words are titles in value.
:param value:
:type value:
:return:
:rtype:
"""
ret = 0
for word in iter_words(value):
if word.value.istitle():
ret += 1
return ret
class SeasonYear(Rule):
"""
If a season is a valid year and no year was found, create an match with year.
"""
priority = POST_PROCESS
consequence = AppendMatch
def when(self, matches, context):
ret = []
if not matches.named('year'):
for season in matches.named('season'):
if valid_year(season.value):
year = copy.copy(season)
year.name = 'year'
ret.append(year)
return ret
class Processors(CustomRule):
"""
Empty rule for ordering post_processing properly.
"""
priority = POST_PROCESS
def when(self, matches, context):
pass
def then(self, matches, when_response, context): # pragma: no cover
pass
def processors():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles, RemoveAmbiguous, SeasonYear, Processors)

View file

@ -0,0 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Properties
"""

View file

@ -0,0 +1,164 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
audio_codec, audio_profile and audio_channels property
"""
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from ..common import dash
from ..common.validators import seps_before, seps_after
audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
def audio_codec():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
def audio_codec_priority(match1, match2):
"""
Gives priority to audio_codec
:param match1:
:type match1:
:param match2:
:type match2:
:return:
:rtype:
"""
if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']:
return match2
if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec':
return match1
return '__default__'
rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
rebulk.regex("Dolby", "DolbyDigital", "Dolby-Digital", "DD", value="DolbyDigital")
rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
rebulk.regex("AAC", value="AAC")
rebulk.regex("AC3D?", value="AC3")
rebulk.regex("Flac", value="FLAC")
rebulk.regex("DTS", value="DTS")
rebulk.regex("True-?HD", value="TrueHD")
rebulk.defaults(name="audio_profile")
rebulk.string("HD", value="HD", tags="DTS")
rebulk.regex("HD-?MA", value="HDMA", tags="DTS")
rebulk.string("HE", value="HE", tags="AAC")
rebulk.string("LC", value="LC", tags="AAC")
rebulk.string("HQ", value="HQ", tags="AC3")
rebulk.defaults(name="audio_channels")
rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True)
rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True)
rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True)
rebulk.string('7ch', '8ch', value='7.1')
rebulk.string('5ch', '6ch', value='5.1')
rebulk.string('2ch', 'stereo', value='2.0')
rebulk.string('1ch', 'mono', value='1.0')
rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule)
return rebulk
class AudioValidatorRule(Rule):
"""
Remove audio properties if not surrounded by separators and not next each others
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
audio_list = matches.range(predicate=lambda match: match.name in audio_properties)
for audio in audio_list:
if not seps_before(audio):
valid_before = matches.range(audio.start - 1, audio.start,
lambda match: match.name in audio_properties)
if not valid_before:
ret.append(audio)
continue
if not seps_after(audio):
valid_after = matches.range(audio.end, audio.end + 1,
lambda match: match.name in audio_properties)
if not valid_after:
ret.append(audio)
continue
return ret
class AudioProfileRule(Rule):
"""
Abstract rule to validate audio profiles
"""
priority = 64
dependency = AudioValidatorRule
consequence = RemoveMatch
def __init__(self, codec):
super(AudioProfileRule, self).__init__()
self.codec = codec
def when(self, matches, context):
profile_list = matches.named('audio_profile', lambda match: self.codec in match.tags)
ret = []
for profile in profile_list:
codec = matches.previous(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
if not codec:
codec = matches.next(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
if not codec:
ret.append(profile)
return ret
class DtsRule(AudioProfileRule):
"""
Rule to validate DTS profile
"""
def __init__(self):
super(DtsRule, self).__init__("DTS")
class AacRule(AudioProfileRule):
"""
Rule to validate AAC profile
"""
def __init__(self):
super(AacRule, self).__init__("AAC")
class Ac3Rule(AudioProfileRule):
"""
Rule to validate AC3 profile
"""
def __init__(self):
super(Ac3Rule, self).__init__("AC3")
class HqConflictRule(Rule):
"""
Solve conflict between HQ from other property and from audio_profile.
"""
dependency = [DtsRule, AacRule, Ac3Rule]
consequence = RemoveMatch
def when(self, matches, context):
hq_audio = matches.named('audio_profile', lambda match: match.value == 'HQ')
hq_audio_spans = [match.span for match in hq_audio]
hq_other = matches.named('other', lambda match: match.span in hq_audio_spans)
if hq_other:
return hq_other

View file

@ -0,0 +1,50 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
bonus property
"""
from rebulk.remodule import re
from rebulk import Rebulk, AppendMatch, Rule
from .title import TitleFromPosition
from ..common.formatters import cleanup
from ..common.validators import seps_surround
def bonus():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
validator={'__parent__': lambda match: seps_surround},
conflict_solver=lambda match, conflicting: match
if conflicting.name in ['video_codec', 'episode'] and 'bonus-conflict' not in conflicting.tags
else '__default__')
rebulk.rules(BonusTitleRule)
return rebulk
class BonusTitleRule(Rule):
"""
Find bonus title after bonus.
"""
dependency = TitleFromPosition
consequence = AppendMatch
properties = {'bonus_title': [None]}
def when(self, matches, context):
bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
if bonus_number:
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
hole = matches.holes(bonus_number.end, filepath.end + 1, formatter=cleanup, index=0)
if hole and hole.value:
hole.name = 'bonus_title'
return hole

View file

@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
cd and cd_count properties
"""
from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
def cds():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
validator={'cd': lambda match: 0 < match.value < 100,
'cd_count': lambda match: 0 < match.value < 100},
formatter={'cd': int, 'cd_count': int},
children=True,
private_parent=True,
properties={'cd': [None], 'cd_count': [None]})
rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
validator={'cd': lambda match: 0 < match.value < 100,
'cd_count': lambda match: 0 < match.value < 100},
formatter={'cd_count': int},
children=True,
private_parent=True,
properties={'cd': [None], 'cd_count': [None]})
return rebulk

View file

@ -0,0 +1,53 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
container property
"""
from rebulk.remodule import re
from rebulk import Rebulk
from ..common.validators import seps_surround
from ...reutils import build_or_pattern
def container():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(name='container',
formatter=lambda value: value[1:],
tags=['extension'],
conflict_solver=lambda match, other: other
if other.name in ['format', 'video_codec'] or
other.name == 'container' and 'extension' not in other.tags
else '__default__')
subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass']
info = ['nfo']
videos = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
'iso', 'vob']
torrent = ['torrent']
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
rebulk.defaults(name='container',
validator=seps_surround,
formatter=lambda s: s.upper(),
conflict_solver=lambda match, other: match
if other.name in ['format',
'video_codec'] or other.name == 'container' and 'extension' in other.tags
else '__default__')
rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
rebulk.string(*videos, tags=['video'])
rebulk.string(*torrent, tags=['torrent'])
return rebulk

View file

@ -0,0 +1,109 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
country property
"""
# pylint: disable=no-member
import babelfish
from rebulk import Rebulk
from ..common.words import COMMON_WORDS, iter_words
def country():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().defaults(name='country')
rebulk.functional(find_countries,
#  Prefer language and any other property over country if not US or GB.
conflict_solver=lambda match, other: match
if other.name != 'language' or match.value not in [babelfish.Country('US'),
babelfish.Country('GB')]
else other,
properties={'country': [None]})
return rebulk
COUNTRIES_SYN = {'ES': ['españa'],
'GB': ['UK'],
'BR': ['brazilian', 'bra'],
'CA': ['québec', 'quebec', 'qc'],
# FIXME: this one is a bit of a stretch, not sure how to do it properly, though...
'MX': ['Latinoamérica', 'latin america']}
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
def __init__(self):
self.guessit_exceptions = {}
for alpha2, synlist in COUNTRIES_SYN.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = alpha2
@property
def codes(self): # pylint: disable=missing-docstring
return (babelfish.country_converters['name'].codes |
frozenset(babelfish.COUNTRIES.values()) |
frozenset(self.guessit_exceptions.keys()))
def convert(self, alpha2):
if alpha2 == 'GB':
return 'UK'
return str(babelfish.Country(alpha2))
def reverse(self, name):
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
return self.guessit_exceptions[name.lower()]
except KeyError:
pass
try:
return babelfish.Country(name.upper()).alpha2
except ValueError:
pass
for conv in [babelfish.Country.fromname]:
try:
return conv(name).alpha2
except babelfish.CountryReverseError:
pass
raise babelfish.CountryReverseError(name)
babelfish.country_converters['guessit'] = GuessitCountryConverter()
def is_allowed_country(country_object, context=None):
"""
Check if country is allowed.
"""
if context and context.get('allowed_countries'):
allowed_countries = context.get('allowed_countries')
return country_object.name.lower() in allowed_countries or country_object.alpha2.lower() in allowed_countries
return True
def find_countries(string, context=None):
"""
Find countries in given string.
"""
ret = []
for word_match in iter_words(string.strip().lower()):
word = word_match.value
if word.lower() in COMMON_WORDS:
continue
try:
country_object = babelfish.Country.fromguessit(word)
if is_allowed_country(country_object, context):
ret.append((word_match.span[0], word_match.span[1], {'value': country_object}))
except babelfish.Error:
continue
return ret

View file

@ -0,0 +1,85 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
crc and uuid properties
"""
from rebulk.remodule import re
from rebulk import Rebulk
from ..common.validators import seps_surround
def crc():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(validator=seps_surround)
rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
conflict_solver=lambda match, other: match
if other.name in ['episode', 'season']
else '__default__')
rebulk.functional(guess_idnumber, name='uuid',
conflict_solver=lambda match, other: match
if other.name in ['episode', 'season']
else '__default__')
return rebulk
_DIGIT = 0
_LETTER = 1
_OTHER = 2
_idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
def guess_idnumber(string):
"""
Guess id number function
:param string:
:type string:
:return:
:rtype:
"""
# pylint:disable=invalid-name
ret = []
matches = list(_idnum.finditer(string))
for match in matches:
result = match.groupdict()
switch_count = 0
switch_letter_count = 0
letter_count = 0
last_letter = None
last = _LETTER
for c in result['uuid']:
if c in '0123456789':
ci = _DIGIT
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
ci = _LETTER
if c != last_letter:
switch_letter_count += 1
last_letter = c
letter_count += 1
else:
ci = _OTHER
if ci != last:
switch_count += 1
last = ci
# only return the result as probable if we alternate often between
# char type (more likely for hash values than for common words)
switch_ratio = float(switch_count) / len(result['uuid'])
letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
if switch_ratio > 0.4 and letters_ratio > 0.4:
ret.append(match.span())
return ret

View file

@ -0,0 +1,72 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
date and year properties
"""
from rebulk import Rebulk, RemoveMatch, Rule
from ..common.date import search_date, valid_year
from ..common.validators import seps_surround
def date():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().defaults(validator=seps_surround)
rebulk.regex(r"\d{4}", name="year", formatter=int,
validator=lambda match: seps_surround(match) and valid_year(match.value))
def date_functional(string, context):
"""
Search for date in the string and retrieves match
:param string:
:return:
"""
ret = search_date(string, context.get('date_year_first'), context.get('date_day_first'))
if ret:
return ret[0], ret[1], {'value': ret[2]}
rebulk.functional(date_functional, name="date", properties={'date': [None]},
conflict_solver=lambda match, other: other
if other.name in ['episode', 'season']
else '__default__')
rebulk.rules(KeepMarkedYearInFilepart)
return rebulk
class KeepMarkedYearInFilepart(Rule):
"""
Keep first years marked with [](){} in filepart, or if no year is marked, ensure it won't override titles.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
if len(matches.named('year')) > 1:
for filepart in matches.markers.named('path'):
years = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year')
if len(years) > 1:
group_years = []
ungroup_years = []
for year in years:
if matches.markers.at_match(year, lambda marker: marker.name == 'group'):
group_years.append(year)
else:
ungroup_years.append(year)
if group_years and ungroup_years:
ret.extend(ungroup_years)
ret.extend(group_years[1:]) # Keep the first year in marker.
elif not group_years:
ret.append(ungroup_years[0]) # Keep first year for title.
if len(ungroup_years) > 2:
ret.extend(ungroup_years[2:])
return ret

View file

@ -0,0 +1,31 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
edition property
"""
from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.validators import seps_surround
def edition():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name='edition', validator=seps_surround)
rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition')
rebulk.regex('special-edition', 'edition-special', value='Special Edition',
conflict_solver=lambda match, other: other
if other.name == 'episode_details' and other.value == 'Special'
else '__default__')
rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
rebulk.regex('director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', value='Director\'s cut')
return rebulk

View file

@ -0,0 +1,196 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Episode title
"""
from collections import defaultdict
from rebulk import Rebulk, Rule, AppendMatch, RenameMatch
from ..common import seps, title_seps
from ..properties.title import TitleFromPosition, TitleBaseRule
from ..common.formatters import cleanup
def episode_title():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().rules(EpisodeTitleFromPosition,
AlternativeTitleReplace,
TitleToEpisodeTitle,
Filepart3EpisodeTitle,
Filepart2EpisodeTitle)
return rebulk
class TitleToEpisodeTitle(Rule):
"""
If multiple different title are found, convert the one following episode number to episode_title.
"""
dependency = TitleFromPosition
def when(self, matches, context):
titles = matches.named('title')
if len(titles) < 2:
return
title_groups = defaultdict(list)
for title in titles:
title_groups[title.value].append(title)
episode_titles = []
main_titles = []
for title in titles:
if matches.previous(title, lambda match: match.name == 'episode'):
episode_titles.append(title)
else:
main_titles.append(title)
if episode_titles:
return episode_titles
def then(self, matches, when_response, context):
for title in when_response:
matches.remove(title)
title.name = 'episode_title'
matches.append(title)
class EpisodeTitleFromPosition(TitleBaseRule):
"""
Add episode title match in existing matches
Must run after TitleFromPosition rule.
"""
dependency = TitleToEpisodeTitle
def hole_filter(self, hole, matches):
episode = matches.previous(hole,
lambda previous: any(name in previous.names
for name in ['episode', 'episode_details',
'episode_count', 'season', 'season_count',
'date', 'title', 'year']),
0)
crc32 = matches.named('crc32')
return episode or crc32
def filepart_filter(self, filepart, matches):
# Filepart where title was found.
if matches.range(filepart.start, filepart.end, lambda match: match.name == 'title'):
return True
return False
def should_remove(self, match, matches, filepart, hole, context):
if match.name == 'episode_details':
return False
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
def __init__(self):
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
def when(self, matches, context):
if matches.named('episode_title'):
return
return super(EpisodeTitleFromPosition, self).when(matches, context)
class AlternativeTitleReplace(Rule):
"""
If alternateTitle was found and title is next to episode, season or date, replace it with episode_title.
"""
dependency = EpisodeTitleFromPosition
consequence = RenameMatch
def when(self, matches, context):
if matches.named('episode_title'):
return
alternative_title = matches.range(predicate=lambda match: match.name == 'alternative_title', index=0)
if alternative_title:
main_title = matches.chain_before(alternative_title.start, seps=seps,
predicate=lambda match: 'title' in match.tags, index=0)
if main_title:
episode = matches.previous(main_title,
lambda previous: any(name in previous.names
for name in ['episode', 'episode_details',
'episode_count', 'season',
'season_count',
'date', 'title', 'year']),
0)
crc32 = matches.named('crc32')
if episode or crc32:
return alternative_title
def then(self, matches, when_response, context):
matches.remove(when_response)
when_response.name = 'episode_title'
matches.append(when_response)
class Filepart3EpisodeTitle(Rule):
"""
If we have at least 3 filepart structured like this:
Serie name/SO1/E01-episode_title.mkv
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
If CCCC contains episode and BBB contains seasonNumber
Then title is to be found in AAAA.
"""
consequence = AppendMatch('title')
def when(self, matches, context):
fileparts = matches.markers.named('path')
if len(fileparts) < 3:
return
filename = fileparts[-1]
directory = fileparts[-2]
subdirectory = fileparts[-3]
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
if episode_number:
season = matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0)
if season:
hole = matches.holes(subdirectory.start, subdirectory.end,
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
index=0)
if hole:
return hole
class Filepart2EpisodeTitle(Rule):
"""
If we have at least 2 filepart structured like this:
Serie name SO1/E01-episode_title.mkv
AAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
If BBBB contains episode and AAA contains a hole followed by seasonNumber
Then title is to be found in AAAA.
"""
consequence = AppendMatch('title')
def when(self, matches, context):
fileparts = matches.markers.named('path')
if len(fileparts) < 2:
return
filename = fileparts[-1]
directory = fileparts[-2]
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
if episode_number:
season = matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0)
if season:
hole = matches.holes(directory.start, directory.end, formatter=cleanup, seps=title_seps,
predicate=lambda match: match.value, index=0)
if hole:
return hole

View file

@ -0,0 +1,516 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
episode, season, episode_count, season_count and episode_details properties
"""
import copy
from collections import defaultdict
from rebulk import Rebulk, RemoveMatch, Rule, AppendMatch, RenameMatch
from rebulk.match import Match
from rebulk.remodule import re
from rebulk.utils import is_iterable
from .title import TitleFromPosition
from ..common import dash, alt_dash, seps
from ..common.formatters import strip
from ..common.numeral import numeral, parse_numeral
from ..common.validators import compose, seps_surround, seps_before, int_coercable
from ...reutils import build_or_pattern
def episodes():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
def season_episode_conflict_solver(match, other):
"""
Conflict solver for episode/season patterns
:param match:
:param other:
:return:
"""
if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec',
'audio_codec', 'audio_channels',
'container', 'date']:
return match
elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \
and match.initiator != other.initiator:
if 'weak-episode' in match.tags:
return match
if 'weak-episode' in other.tags:
return other
if 'x' in match.initiator.raw.lower():
return match
if 'x' in other.initiator.raw.lower():
return other
return '__default__'
season_episode_seps = []
season_episode_seps.extend(seps)
season_episode_seps.extend(['x', 'X', 'e', 'E'])
season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series']
episode_words = ['episode', 'episodes', 'ep']
of_words = ['of', 'sur']
all_words = ['All']
season_markers = ["S"]
season_ep_markers = ["x"]
episode_markers = ["xE", "Ex", "EP", "E", "x"]
range_separators = ['-', '~', 'to', 'a']
weak_discrete_separators = list(sep for sep in seps if sep not in range_separators)
strong_discrete_separators = ['+', '&', 'and', 'et']
discrete_separators = strong_discrete_separators + weak_discrete_separators
def ordering_validator(match):
"""
Validator for season list. They should be in natural order to be validated.
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
"""
values = match.children.to_dict(implicit=True)
if 'season' in values and is_iterable(values['season']):
# Season numbers must be in natural order to be validated.
if not list(sorted(values['season'])) == values['season']:
return False
if 'episode' in values and is_iterable(values['episode']):
# Season numbers must be in natural order to be validated.
if not list(sorted(values['episode'])) == values['episode']:
return False
def is_consecutive(property_name):
"""
Check if the property season or episode has valid consecutive values.
:param property_name:
:type property_name:
:return:
:rtype:
"""
previous_match = None
valid = True
for current_match in match.children.named(property_name):
if previous_match:
match.children.previous(current_match,
lambda m: m.name == property_name + 'Separator')
separator = match.children.previous(current_match,
lambda m: m.name == property_name + 'Separator', 0)
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not current_match.value - previous_match.value == 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
break
previous_match = current_match
return valid
return is_consecutive('episode') and is_consecutive('season')
# S01E02, 01x02, S01S02S03
rebulk.chain(formatter={'season': int, 'episode': int},
tags=['SxxExx'],
abbreviations=[alt_dash],
children=True,
private_parent=True,
validate_all=True,
validator={'__parent__': ordering_validator},
conflict_solver=season_episode_conflict_solver) \
.regex(build_or_pattern(season_markers) + r'(?P<season>\d+)@?' +
build_or_pattern(episode_markers) + r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}).repeater('+') \
.regex(build_or_pattern(episode_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers) +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.chain() \
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers) +
r'@?(?P<episode>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
name='episodeSeparator',
escape=True) +
r'(?P<episode>\d+)').repeater('*') \
.chain() \
.regex(build_or_pattern(season_markers) + r'(?P<season>\d+)',
validate_all=True,
validator={'__parent__': seps_before}) \
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
name='seasonSeparator',
escape=True) +
r'(?P<season>\d+)').repeater('*')
# episode_details property
for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'):
rebulk.string(episode_detail, value=episode_detail, name='episode_details')
rebulk.regex(r'Extras?', name='episode_details', value='Extras')
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'],
validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True)
def validate_roman(match):
"""
Validate a roman match if surrounded by separators
:param match:
:type match:
:return:
:rtype:
"""
if int_coercable(match.raw):
return True
return seps_surround(match)
rebulk.chain(abbreviations=[alt_dash],
formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'__parent__': compose(seps_surround, ordering_validator),
'season': validate_roman,
'count': validate_roman}) \
.defaults(validator=None) \
.regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
.regex(r'@?(?P<seasonSeparator>' +
build_or_pattern(range_separators + discrete_separators + ['@'], escape=True) +
r')@?(?P<season>\d+)').repeater('*')
rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash], formatter=int,
disabled=lambda context: context.get('type') == 'episode')
rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
abbreviations=[dash],
validator={'episode': validate_roman},
formatter={'episode': parse_numeral, 'version': int, 'count': int},
disabled=lambda context: context.get('type') != 'episode')
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
tags=['SxxExx'],
abbreviations=[dash],
validator=None,
formatter={'season': int, 'other': lambda match: 'Complete'})
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
validator={'__parent__': seps_surround}, children=True, private_parent=True)
# 12, 13
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.regex(r'(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*')
# 012, 013
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.regex(r'0(?P<episode>\d{1,2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*')
# 112, 113
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: not context.get('episode_prefer_number', False)) \
.defaults(validator=None) \
.regex(r'(?P<episode>\d{3,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*')
# 1, 2, 3
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int},
disabled=lambda context: context.get('type') != 'episode') \
.defaults(validator=None) \
.regex(r'(?P<episode>\d)') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*')
# e112, e113
# TODO: Enhance rebulk for validator to be used globally (season_episode_validator)
rebulk.chain(formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
# ep 112, ep113, ep112, ep113
rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \
.defaults(validator=None) \
.regex(r'ep-?(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*')
# 102, 0102
rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'],
formatter={'season': int, 'episode': int, 'version': int},
conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
disabled=lambda context: context.get('episode_prefer_number', False)) \
.defaults(validator=None) \
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*')
rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int)
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
# TODO: List of words
# detached of X count (season/episode)
rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) +
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
abbreviations=[dash], children=True, private_parent=True, formatter=int)
rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode")
# Harcoded movie to disable weak season/episodes
rebulk.regex('OSS-?117',
abbreviations=[dash], name="hardcoded-movies", marker=True,
conflict_solver=lambda match, other: None)
rebulk.rules(EpisodeNumberSeparatorRange(range_separators),
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx,
RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator,
CountValidator, EpisodeSingleDigitValidator)
return rebulk
class CountValidator(Rule):
"""
Validate count property and rename it
"""
priority = 64
consequence = [RemoveMatch, RenameMatch('episode_count'), RenameMatch('season_count')]
properties = {'episode_count': [None], 'season_count': [None]}
def when(self, matches, context):
to_remove = []
episode_count = []
season_count = []
for count in matches.named('count'):
previous = matches.previous(count, lambda match: match.name in ['episode', 'season'], 0)
if previous:
if previous.name == 'episode':
episode_count.append(count)
elif previous.name == 'season':
season_count.append(count)
else:
to_remove.append(count)
return to_remove, episode_count, season_count
class AbstractSeparatorRange(Rule):
"""
Remove separator matches and create matches for season range.
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators, property_name):
super(AbstractSeparatorRange, self).__init__()
self.range_separators = range_separators
self.property_name = property_name
def when(self, matches, context):
to_remove = []
to_append = []
for separator in matches.named(self.property_name + 'Separator'):
previous_match = matches.previous(separator, lambda match: match.name == self.property_name, 0)
next_match = matches.next(separator, lambda match: match.name == self.property_name, 0)
if previous_match and next_match and separator.value in self.range_separators:
for episode_number in range(previous_match.value + 1, next_match.value):
match = copy.copy(next_match)
match.value = episode_number
to_append.append(match)
to_remove.append(separator)
previous_match = None
for next_match in matches.named(self.property_name):
if previous_match:
separator = matches.input_string[previous_match.initiator.end:next_match.initiator.start]
if separator not in self.range_separators:
separator = strip(separator)
if separator in self.range_separators:
for episode_number in range(previous_match.value + 1, next_match.value):
match = copy.copy(next_match)
match.value = episode_number
to_append.append(match)
to_append.append(Match(previous_match.end, next_match.start - 1,
name=self.property_name + 'Separator',
private=True,
input_string=matches.input_string))
to_remove.append(next_match) # Remove and append match to support proper ordering
to_append.append(next_match)
previous_match = next_match
return to_remove, to_append
class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
"""
Remove separator matches and create matches for episoderNumber range.
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators):
super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode")
class SeasonSeparatorRange(AbstractSeparatorRange):
"""
Remove separator matches and create matches for season range.
"""
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators):
super(SeasonSeparatorRange, self).__init__(range_separators, "season")
class RemoveWeakIfMovie(Rule):
"""
Remove weak-movie tagged matches if it seems to be a movie.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
if matches.named('year') or matches.markers.named('hardcoded-movies'):
return matches.tagged('weak-movie')
class RemoveWeakIfSxxExx(Rule):
"""
Remove weak-movie tagged matches if SxxExx pattern is matched.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
if matches.tagged('SxxExx', lambda match: not match.private):
return matches.tagged('weak-movie')
class RemoveWeakDuplicate(Rule):
"""
Remove weak-duplicate tagged matches if duplicate patterns, for example The 100.109
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
patterns = defaultdict(list)
for match in reversed(matches.range(filepart.start, filepart.end,
predicate=lambda match: 'weak-duplicate' in match.tags)):
if match.pattern in patterns[match.name]:
to_remove.append(match)
else:
patterns[match.name].append(match.pattern)
return to_remove
class EpisodeDetailValidator(Rule):
"""
Validate episode_details if they are detached or next to season or episode.
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for detail in matches.named('episode_details'):
if not seps_surround(detail) \
and not matches.previous(detail, lambda match: match.name in ['season', 'episode']) \
and not matches.next(detail, lambda match: match.name in ['season', 'episode']):
ret.append(detail)
return ret
class RemoveDetachedEpisodeNumber(Rule):
"""
If multiple episode are found, remove those that are not detached from a range and less than 10.
Fairy Tail 2 - 16-20, 2 should be removed.
"""
priority = 64
consequence = RemoveMatch
dependency = [RemoveWeakIfSxxExx, RemoveWeakDuplicate]
def when(self, matches, context):
ret = []
episode_numbers = []
episode_values = set()
for match in matches.named('episode', lambda match: not match.private and 'weak-movie' in match.tags):
if match.value not in episode_values:
episode_numbers.append(match)
episode_values.add(match.value)
episode_numbers = list(sorted(episode_numbers, key=lambda match: match.value))
if len(episode_numbers) > 1 and \
episode_numbers[0].value < 10 and \
episode_numbers[1].value - episode_numbers[0].value != 1:
parent = episode_numbers[0]
while parent: # TODO: Add a feature in rebulk to avoid this ...
ret.append(parent)
parent = parent.parent
return ret
class VersionValidator(Rule):
"""
Validate version if previous match is episode or if surrounded by separators.
"""
priority = 64
dependency = [RemoveWeakIfMovie, RemoveWeakIfSxxExx]
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for version in matches.named('version'):
episode_number = matches.previous(version, lambda match: match.name == 'episode', 0)
if not episode_number and not seps_surround(version.initiator):
ret.append(version)
return ret
class EpisodeSingleDigitValidator(Rule):
"""
Remove single digit episode when inside a group that doesn't own title.
"""
dependency = [TitleFromPosition]
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for episode in matches.named('episode', lambda match: len(match.initiator) == 1):
group = matches.markers.at_match(episode, lambda marker: marker.name == 'group', index=0)
if group:
if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
ret.append(episode)
return ret

View file

@ -0,0 +1,42 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
film property
"""
from rebulk.remodule import re
from rebulk import Rebulk, AppendMatch, Rule
from ..common.formatters import cleanup
def film():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int)
rebulk.rules(FilmTitleRule)
return rebulk
class FilmTitleRule(Rule):
"""
Rule to find out film_title (hole after film property
"""
consequence = AppendMatch
properties = {'film_title': [None]}
def when(self, matches, context):
bonus_number = matches.named('film', lambda match: not match.private, index=0)
if bonus_number:
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
hole = matches.holes(filepath.start, bonus_number.start + 1, formatter=cleanup, index=0)
if hole and hole.value:
hole.name = 'film_title'
return hole

View file

@ -0,0 +1,67 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
format property
"""
from rebulk.remodule import re
from rebulk import Rebulk, RemoveMatch, Rule
from ..common import dash
from ..common.validators import seps_before, seps_after
def format_():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name="format")
rebulk.regex("VHS", "VHS-?Rip", value="VHS")
rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam")
rebulk.regex("TELESYNC", "TS", "HD-?TS", value="Telesync")
rebulk.regex("WORKPRINT", "WP", value="Workprint")
rebulk.regex("TELECINE", "TC", value="Telecine")
rebulk.regex("PPV", "PPV-?Rip", value="PPV") # Pay Per View
rebulk.regex("SD-?TV", "SD-?TV-?Rip", "Rip-?SD-?TV", "TV-?Rip",
"Rip-?TV", value="TV") # TV is too common to allow matching
rebulk.regex("DVB-?Rip", "DVB", "PD-?TV", value="DVB")
rebulk.regex("DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))", # "DVD-?R(?:$|^E)" => DVD-Real ...
"DVD-?9", "DVD-?5", value="DVD")
rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV")
rebulk.regex("VOD", "VOD-?Rip", value="VOD")
rebulk.regex("WEB-?Rip", value="WEBRip")
rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", value="WEB-DL")
rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD")
rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay")
rebulk.rules(ValidateFormat)
return rebulk
class ValidateFormat(Rule):
"""
Validate format with screener property, with video_codec property or separated
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for format_match in matches.named('format'):
if not seps_before(format_match) and \
not matches.range(format_match.start - 1, format_match.start - 2,
lambda match: match.name == 'other' and match.value == 'Screener'):
ret.append(format_match)
continue
if not seps_after(format_match) and \
not matches.range(format_match.end, format_match.end + 1,
lambda match: match.name == 'video_codec' or (
match.name == 'other' and match.value == 'Screener')):
ret.append(format_match)
continue
return ret

View file

@ -0,0 +1,249 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
language and subtitle_language properties
"""
# pylint: disable=no-member
import copy
import babelfish
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch
from ..common.words import iter_words, COMMON_WORDS
from ..common.validators import seps_surround
def language():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()
rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True,
validator=seps_surround)
rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True,
validator=seps_surround)
rebulk.functional(find_languages, properties={'language': [None]})
rebulk.rules(SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule)
return rebulk
COMMON_WORDS_STRICT = frozenset(['brazil'])
UNDETERMINED = babelfish.Language('und')
SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'],
('ell', None): ['gr', 'greek'],
('spa', None): ['esp', 'español'],
('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'],
('swe', None): ['se'],
('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'],
('cat', None): ['català'],
('ces', None): ['cz'],
('ukr', None): ['ua'],
('zho', None): ['cn'],
('jpn', None): ['jp'],
('hrv', None): ['scr'],
('mul', None): ['multi', 'dl']} # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
def __init__(self):
self.guessit_exceptions = {}
for (alpha3, country), synlist in SYN.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@property
def codes(self): # pylint: disable=missing-docstring
return (babelfish.language_converters['alpha3b'].codes |
babelfish.language_converters['alpha2'].codes |
babelfish.language_converters['name'].codes |
babelfish.language_converters['opensubtitles'].codes |
babelfish.country_converters['name'].codes |
frozenset(self.guessit_exceptions.keys()))
def convert(self, alpha3, country=None, script=None):
return str(babelfish.Language(alpha3, country, script))
def reverse(self, name):
with_country = (GuessitConverter._with_country_regexp.match(name) or
GuessitConverter._with_country_regexp2.match(name))
name = name.lower()
if with_country:
lang = babelfish.Language.fromguessit(with_country.group(1).strip())
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
return lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
return self.guessit_exceptions[name]
except KeyError:
pass
for conv in [babelfish.Language,
babelfish.Language.fromalpha3b,
babelfish.Language.fromalpha2,
babelfish.Language.fromname,
babelfish.Language.fromopensubtitles]:
try:
reverse = conv(name)
return reverse.alpha3, reverse.country, reverse.script
except (ValueError, babelfish.LanguageReverseError):
pass
raise babelfish.LanguageReverseError(name)
babelfish.language_converters['guessit'] = GuessitConverter()
subtitle_both = ['sub', 'subs', 'subbed', 'custom subbed', 'custom subs', 'custom sub', 'customsubbed', 'customsubs',
'customsub']
subtitle_prefixes = subtitle_both + ['st', 'vost', 'subforced', 'fansub', 'hardsub']
subtitle_suffixes = subtitle_both + ['subforced', 'fansub', 'hardsub']
lang_prefixes = ['true']
all_lang_prefixes_suffixes = subtitle_prefixes + subtitle_suffixes + lang_prefixes
def find_languages(string, context=None):
"""Find languages in the string
:return: list of tuple (property, Language, lang_word, word)
"""
allowed_languages = context.get('allowed_languages')
common_words = COMMON_WORDS_STRICT if allowed_languages else COMMON_WORDS
matches = []
for word_match in iter_words(string):
word = word_match.value
start, end = word_match.span
lang_word = word.lower()
key = 'language'
for prefix in subtitle_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
key = 'subtitle_language'
for suffix in subtitle_suffixes:
if lang_word.endswith(suffix):
lang_word = lang_word[:len(lang_word) - len(suffix)]
key = 'subtitle_language'
for prefix in lang_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
if lang_word not in common_words and word.lower() not in common_words:
try:
lang = babelfish.Language.fromguessit(lang_word)
match = (start, end, {'name': key, 'value': lang})
if allowed_languages:
if lang.name.lower() in allowed_languages \
or lang.alpha2.lower() in allowed_languages \
or lang.alpha3.lower() in allowed_languages:
matches.append(match)
# Keep language with alpha2 equivalent. Others are probably
# uncommon languages.
elif lang == 'mul' or hasattr(lang, 'alpha2'):
matches.append(match)
except babelfish.Error:
pass
return matches
class SubtitlePrefixLanguageRule(Rule):
"""
Convert language guess as subtitle_language if previous match is a subtitle language prefix
"""
consequence = RemoveMatch
properties = {'subtitle_language': [None]}
def when(self, matches, context):
to_rename = []
to_remove = matches.named('subtitle_language.prefix')
for lang in matches.named('language'):
prefix = matches.previous(lang, lambda match: match.name == 'subtitle_language.prefix', 0)
if not prefix:
group_marker = matches.markers.at_match(lang, lambda marker: marker.name == 'group', 0)
if group_marker:
# Find prefix if placed just before the group
prefix = matches.previous(group_marker, lambda match: match.name == 'subtitle_language.prefix',
0)
if not prefix:
# Find prefix if placed before in the group
prefix = matches.range(group_marker.start, lang.start,
lambda match: match.name == 'subtitle_language.prefix', 0)
if prefix:
to_rename.append((prefix, lang))
if prefix in to_remove:
to_remove.remove(prefix)
return to_rename, to_remove
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
super(SubtitlePrefixLanguageRule, self).then(matches, to_remove, context)
for prefix, match in to_rename:
# Remove suffix equivalent of prefix.
suffix = copy.copy(prefix)
suffix.name = 'subtitle_language.suffix'
if suffix in matches:
matches.remove(suffix)
matches.remove(match)
match.name = 'subtitle_language'
matches.append(match)
class SubtitleSuffixLanguageRule(Rule):
"""
Convert language guess as subtitle_language if next match is a subtitle language suffix
"""
dependency = SubtitlePrefixLanguageRule
consequence = RemoveMatch
properties = {'subtitle_language': [None]}
def when(self, matches, context):
to_append = []
to_remove = matches.named('subtitle_language.suffix')
for lang in matches.named('language'):
suffix = matches.next(lang, lambda match: match.name == 'subtitle_language.suffix', 0)
if suffix:
to_append.append(lang)
if suffix in to_remove:
to_remove.remove(suffix)
return to_append, to_remove
def then(self, matches, when_response, context):
to_rename, to_remove = when_response
super(SubtitleSuffixLanguageRule, self).then(matches, to_remove, context)
for match in to_rename:
matches.remove(match)
match.name = 'subtitle_language'
matches.append(match)
class SubtitleExtensionRule(Rule):
"""
Convert language guess as subtitle_language if next match is a subtitle extension
"""
consequence = RenameMatch('subtitle_language')
properties = {'subtitle_language': [None]}
def when(self, matches, context):
subtitle_extension = matches.named('container',
lambda match: 'extension' in match.tags and 'subtitle' in match.tags,
0)
if subtitle_extension:
subtitle_lang = matches.previous(subtitle_extension, lambda match: match.name == 'language', 0)
if subtitle_lang:
return subtitle_lang

View file

@ -0,0 +1,48 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
mimetype property
"""
import mimetypes
from rebulk import Rebulk, CustomRule, POST_PROCESS
from rebulk.match import Match
from ...rules.processors import Processors
def mimetype():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(Mimetype)
class Mimetype(CustomRule):
"""
Mimetype post processor
:param matches:
:type matches:
:return:
:rtype:
"""
priority = POST_PROCESS
dependency = Processors
def when(self, matches, context):
mime, _ = mimetypes.guess_type(matches.input_string, strict=False)
return mime
def then(self, matches, when_response, context):
mime = when_response
matches.append(Match(len(matches.input_string), len(matches.input_string), name='mimetype', value=mime))
@property
def properties(self):
"""
Properties for this rule.
"""
return {'mimetype': [None]}

View file

@ -0,0 +1,181 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
other property
"""
import copy
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch, POST_PROCESS, AppendMatch
from ..common import dash
from ..common import seps
from ..common.validators import seps_surround, compose
from ...rules.common.formatters import raw_cleanup
from ...reutils import build_or_pattern
def other():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="other", validator=seps_surround)
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix')
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix')
rebulk.regex('Dual-?Audio', value='DualAudio')
rebulk.regex('ws', 'wide-?screen', value='WideScreen')
rebulk.string('Netflix', 'NF', value='Netflix')
rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after'])
rebulk.string('Proper', 'Repack', 'Rerip', value='Proper')
rebulk.string('Fansub', value='Fansub', tags='has-neighbor')
rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor')
season_words = build_or_pattern(["seasons?", "series?"])
complete_articles = build_or_pattern(["The"])
def validate_complete(match):
"""
Make sure season word is are defined.
:param match:
:type match:
:return:
:rtype:
"""
children = match.children
if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'):
return False
return True
rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' +
'(?P<completeWordsBefore>' + season_words + '-)?' +
'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?',
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
value={'other': 'Complete'},
tags=['release-group-prefix'],
validator={'__parent__': compose(seps_surround, validate_complete)})
rebulk.string('R5', 'RC', value='R5')
rebulk.regex('Pre-?Air', value='Preair')
for value in (
'Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
'CC', 'LD', 'MD', 'XXX'):
rebulk.string(value, value=value)
for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut',
'Extended', 'Extended Cut'):
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
rebulk.string('VO', 'OV', value='OV', tags='has-neighbor')
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags='other.validate.screener')
rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule,
ProperCountRule)
return rebulk
class ProperCountRule(Rule):
"""
Add proper_count property
"""
priority = POST_PROCESS
consequence = AppendMatch
properties = {'proper_count': [None]}
def when(self, matches, context):
propers = matches.named('other', lambda match: match.value == 'Proper')
if propers:
raws = {} # Count distinct raw values
for proper in propers:
raws[raw_cleanup(proper.raw)] = proper
proper_count_match = copy.copy(propers[-1])
proper_count_match.name = 'proper_count'
proper_count_match.value = len(raws)
return proper_count_match
class ValidateHasNeighbor(Rule):
"""
Validate tag has-neighbor
"""
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for to_check in matches.range(predicate=lambda match: 'has-neighbor' in match.tags):
previous_match = matches.previous(to_check, index=0)
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
if previous_group and (not previous_match or previous_group.end > previous_match.end):
previous_match = previous_group
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
break
next_match = matches.next(to_check, index=0)
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
if next_group and (not next_match or next_group.start < next_match.start):
next_match = next_group
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
break
ret.append(to_check)
return ret
class ValidateHasNeighborBefore(Rule):
"""
Validate tag has-neighbor-before that previous match exists.
"""
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for to_check in matches.range(predicate=lambda match: 'has-neighbor-before' in match.tags):
next_match = matches.next(to_check, index=0)
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
if next_group and (not next_match or next_group.start < next_match.start):
next_match = next_group
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
break
ret.append(to_check)
return ret
class ValidateHasNeighborAfter(Rule):
"""
Validate tag has-neighbor-after that next match exists.
"""
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for to_check in matches.range(predicate=lambda match: 'has-neighbor-after' in match.tags):
previous_match = matches.previous(to_check, index=0)
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
if previous_group and (not previous_match or previous_group.end > previous_match.end):
previous_match = previous_group
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
break
ret.append(to_check)
return ret
class ValidateScreenerRule(Rule):
"""
Validate tag other.validate.screener
"""
consequence = RemoveMatch
priority = 64
def when(self, matches, context):
ret = []
for screener in matches.named('other', lambda match: 'other.validate.screener' in match.tags):
format_match = matches.previous(screener, lambda match: match.name == 'format', 0)
if not format_match or matches.input_string[format_match.end:screener.start].strip(seps):
ret.append(screener)
return ret

View file

@ -0,0 +1,41 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
part property
"""
from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.validators import seps_surround, int_coercable, compose
from ..common.numeral import numeral, parse_numeral
from ...reutils import build_or_pattern
def part():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
prefixes = ['pt', 'part']
def validate_roman(match):
"""
Validate a roman match if surrounded by separators
:param match:
:type match:
:return:
:rtype:
"""
if int_coercable(match.raw):
return True
return seps_surround(match)
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
return rebulk

View file

@ -0,0 +1,171 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
release_group property
"""
import copy
from rebulk.remodule import re
from rebulk import Rebulk, Rule, AppendMatch
from ..common.validators import int_coercable
from ..properties.title import TitleFromPosition
from ..common.formatters import cleanup
from ..common import seps, dash
from ..common.comparators import marker_sorted
def release_group():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(SceneReleaseGroup, AnimeReleaseGroup, ExpectedReleaseGroup)
forbidden_groupnames = ['rip', 'by', 'for', 'par', 'pour', 'bonus']
groupname_ignore_seps = '[]{}()'
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
def clean_groupname(string):
"""
Removes and strip separators from input_string
:param input_string:
:type input_string:
:return:
:rtype:
"""
string = string.strip(groupname_seps)
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps)))\
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
string = string.strip(groupname_ignore_seps)
for forbidden in forbidden_groupnames:
if string.lower().startswith(forbidden):
string = string[len(forbidden):]
string = string.strip(groupname_seps)
if string.lower().endswith(forbidden):
string = string[:len(forbidden)]
string = string.strip(groupname_seps)
return string
_scene_previous_names = ['video_codec', 'format', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
'subtitle_language.suffix', 'subtitle_language.prefix']
_scene_previous_tags = ['release-group-prefix']
class ExpectedReleaseGroup(Rule):
"""
Add release_group match from expected_group option
"""
consequence = AppendMatch
properties = {'release_group': [None]}
def enabled(self, context):
return context.get('expected_group')
def when(self, matches, context):
expected_rebulk = Rebulk().defaults(name='release_group')
for expected_group in context.get('expected_group'):
if expected_group.startswith('re:'):
expected_group = expected_group[3:]
expected_group = expected_group.replace(' ', '-')
expected_rebulk.regex(expected_group, abbreviations=[dash], flags=re.IGNORECASE)
else:
expected_rebulk.string(expected_group, ignore_case=True)
matches = expected_rebulk.matches(matches.input_string, context)
return matches
class SceneReleaseGroup(Rule):
"""
Add release_group match in existing matches (scene format).
Something.XViD-ReleaseGroup.mkv
"""
dependency = [TitleFromPosition, ExpectedReleaseGroup]
consequence = AppendMatch
properties = {'release_group': [None]}
def when(self, matches, context):
# If a release_group is found before, ignore this kind of release_group rule.
ret = []
for filepart in marker_sorted(matches.markers.named('path'), matches):
start, end = filepart.span
last_hole = matches.holes(start, end + 1, formatter=clean_groupname,
predicate=lambda hole: cleanup(hole.value), index=-1)
if last_hole:
previous_match = matches.previous(last_hole,
lambda match: not match.private or
match.name in _scene_previous_names,
index=0)
if previous_match and (previous_match.name in _scene_previous_names or
any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
and not int_coercable(last_hole.value.strip(seps)):
last_hole.name = 'release_group'
last_hole.tags = ['scene']
# if hole is inside a group marker with same value, remove [](){} ...
group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
if group:
group.formatter = clean_groupname
if group.value == last_hole.value:
last_hole.start = group.start + 1
last_hole.end = group.end - 1
last_hole.tags = ['anime']
ret.append(last_hole)
return ret
class AnimeReleaseGroup(Rule):
"""
Add release_group match in existing matches (anime format)
...[ReleaseGroup] Something.mkv
"""
dependency = [SceneReleaseGroup, TitleFromPosition]
consequence = AppendMatch
properties = {'release_group': [None]}
def when(self, matches, context):
ret = []
# If a release_group is found before, ignore this kind of release_group rule.
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
# This doesn't seems to be an anime
return
for filepart in marker_sorted(matches.markers.named('path'), matches):
# pylint:disable=bad-continuation
empty_group_marker = matches.markers \
.range(filepart.start, filepart.end, lambda marker: marker.name == 'group'
and not matches.range(marker.start, marker.end)
and not int_coercable(marker.value.strip(seps)),
0)
if empty_group_marker:
group = copy.copy(empty_group_marker)
group.marker = False
group.raw_start += 1
group.raw_end -= 1
group.tags = ['anime']
group.name = 'release_group'
ret.append(group)
return ret

View file

@ -0,0 +1,77 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
screen_size property
"""
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from ..common.validators import seps_surround
from ..common import dash
def screen_size():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
def conflict_solver(match, other):
"""
Conflict solver for most screen_size.
"""
if other.name == 'screen_size':
if 'resolution' in other.tags:
# The chtouile to solve conflict in "720 x 432" string matching both 720p pattern
int_value = _digits_re.findall(match.raw)[-1]
if other.value.startswith(int_value):
return match
return other
return '__default__'
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver)
rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K")
_digits_re = re.compile(r'\d+')
rebulk.defaults(name="screen_size", validator=seps_surround)
rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
formatter=lambda value: 'x'.join(_digits_re.findall(value)),
abbreviations=[dash],
tags=['resolution'],
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
rebulk.rules(ScreenSizeOnlyOne)
return rebulk
class ScreenSizeOnlyOne(Rule):
"""
Keep a single screen_size pet filepath part.
"""
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
screensize = list(reversed(matches.range(filepart.start, filepart.end,
lambda match: match.name == 'screen_size')))
if len(screensize) > 1:
to_remove.extend(screensize[1:])
return to_remove

View file

@ -0,0 +1,347 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
title property
"""
import re
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
from rebulk.formatters import formatters
from rebulk.pattern import RePattern
from rebulk.utils import find_all
from .film import FilmTitleRule
from .language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule
from ..common.formatters import cleanup, reorder_title
from ..common.comparators import marker_sorted
from ..common import seps, title_seps, dash
def title():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)
def expected_title(input_string, context):
"""
Expected title functional pattern.
:param input_string:
:type input_string:
:param context:
:type context:
:return:
:rtype:
"""
ret = []
for search in context.get('expected_title'):
if search.startswith('re:'):
search = search[3:]
search = search.replace(' ', '-')
matches = RePattern(search, abbreviations=[dash], flags=re.IGNORECASE).matches(input_string, context)
for match in matches:
# Instance of 'list' has no 'span' member (no-member). Seems to be a pylint bug.
# pylint: disable=no-member
ret.append(match.span)
else:
for start in find_all(input_string, search, ignore_case=True):
ret.append((start, start+len(search)))
return ret
rebulk.functional(expected_title, name='title', tags=['expected'],
conflict_solver=lambda match, other: other,
disabled=lambda context: not context.get('expected_title'))
return rebulk
class TitleBaseRule(Rule):
"""
Add title match in existing matches
"""
# pylint:disable=no-self-use,unused-argument
consequence = [AppendMatch, RemoveMatch]
def __init__(self, match_name, match_tags=None, alternative_match_name=None):
super(TitleBaseRule, self).__init__()
self.match_name = match_name
self.match_tags = match_tags
self.alternative_match_name = alternative_match_name
def hole_filter(self, hole, matches):
"""
Filter holes for titles.
:param hole:
:type hole:
:param matches:
:type matches:
:return:
:rtype:
"""
return True
def filepart_filter(self, filepart, matches):
"""
Filter filepart for titles.
:param filepart:
:type filepart:
:param matches:
:type matches:
:return:
:rtype:
"""
return True
def holes_process(self, holes, matches):
"""
process holes
:param holes:
:type holes:
:param matches:
:type matches:
:return:
:rtype:
"""
cropped_holes = []
for hole in holes:
group_markers = matches.markers.named('group')
cropped_holes.extend(hole.crop(group_markers))
return cropped_holes
def is_ignored(self, match):
"""
Ignore matches when scanning for title (hole).
Full word language and countries won't be ignored if they are uppercase.
"""
return not (len(match) > 3 and match.raw.isupper()) and match.name in ['language', 'country', 'episode_details']
def should_keep(self, match, to_keep, matches, filepart, hole, starting):
"""
Check if this match should be accepted when ending or starting a hole.
:param match:
:type match:
:param to_keep:
:type to_keep: list[Match]
:param matches:
:type matches: Matches
:param hole: the filepart match
:type hole: Match
:param hole: the hole match
:type hole: Match
:param starting: true if match is starting the hole
:type starting: bool
:return:
:rtype:
"""
if match.name in ['language', 'country']:
# Keep language if exactly matching the hole.
if len(hole.value) == len(match.raw):
return True
# Keep language if other languages exists in the filepart.
outside_matches = filepart.crop(hole)
other_languages = []
for outside in outside_matches:
other_languages.extend(matches.range(outside.start, outside.end,
lambda c_match: c_match.name == match.name and
c_match not in to_keep))
if not other_languages:
return True
return False
def should_remove(self, match, matches, filepart, hole, context):
"""
Check if this match should be removed after beeing ignored.
:param match:
:param matches:
:param filepart:
:param hole:
:return:
"""
if context.get('type') == 'episode' and match.name == 'episode_details':
return False
return True
def check_titles_in_filepart(self, filepart, matches, context):
"""
Find title in filepart (ignoring language)
"""
# pylint:disable=too-many-locals,too-many-branches,too-many-statements
start, end = filepart.span
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
ignore=self.is_ignored,
predicate=lambda hole: hole.value)
holes = self.holes_process(holes, matches)
for hole in holes:
# pylint:disable=cell-var-from-loop
if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
continue
to_remove = []
to_keep = []
ignored_matches = matches.range(hole.start, hole.end, self.is_ignored)
if ignored_matches:
for ignored_match in reversed(ignored_matches):
# pylint:disable=undefined-loop-variable
trailing = matches.chain_before(hole.end, seps, predicate=lambda match: match == ignored_match)
if trailing:
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
if should_keep:
# pylint:disable=unpacking-non-sequence
try:
append, crop = should_keep
except TypeError:
append, crop = should_keep, should_keep
if append:
to_keep.append(ignored_match)
if crop:
hole.end = ignored_match.start
for ignored_match in ignored_matches:
if ignored_match not in to_keep:
starting = matches.chain_after(hole.start, seps,
predicate=lambda match: match == ignored_match)
if starting:
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
if should_keep:
# pylint:disable=unpacking-non-sequence
try:
append, crop = should_keep
except TypeError:
append, crop = should_keep, should_keep
if append:
to_keep.append(ignored_match)
if crop:
hole.start = ignored_match.end
for match in ignored_matches:
if self.should_remove(match, matches, filepart, hole, context):
to_remove.append(match)
for keep_match in to_keep:
if keep_match in to_remove:
to_remove.remove(keep_match)
if hole and hole.value:
hole.name = self.match_name
hole.tags = self.match_tags
if self.alternative_match_name:
# Split and keep values that can be a title
titles = hole.split(title_seps, lambda match: match.value)
for title_match in list(titles[1:]):
previous_title = titles[titles.index(title_match) - 1]
separator = matches.input_string[previous_title.end:title_match.start]
if len(separator) == 1 and separator == '-' \
and previous_title.raw[-1] not in seps \
and title_match.raw[0] not in seps:
titles[titles.index(title_match) - 1].end = title_match.end
titles.remove(title_match)
else:
title_match.name = self.alternative_match_name
else:
titles = [hole]
return titles, to_remove
def when(self, matches, context):
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
return
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
to_remove = []
# Priorize fileparts containing the year
years_fileparts = []
for filepart in fileparts:
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
if year_match:
years_fileparts.append(filepart)
ret = []
for filepart in fileparts:
try:
years_fileparts.remove(filepart)
except ValueError:
pass
titles = self.check_titles_in_filepart(filepart, matches, context)
if titles:
titles, to_remove_c = titles
ret.extend(titles)
to_remove.extend(to_remove_c)
break
# Add title match in all fileparts containing the year.
for filepart in years_fileparts:
titles = self.check_titles_in_filepart(filepart, matches, context)
if titles:
# pylint:disable=unbalanced-tuple-unpacking
titles, to_remove_c = titles
ret.extend(titles)
to_remove.extend(to_remove_c)
return ret, to_remove
class TitleFromPosition(TitleBaseRule):
"""
Add title match in existing matches
"""
dependency = [FilmTitleRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule]
properties = {'title': [None], 'alternative_title': [None]}
def __init__(self):
super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')
class PreferTitleWithYear(Rule):
"""
Prefer title where filepart contains year.
"""
dependency = TitleFromPosition
consequence = [RemoveMatch, AppendTags(['equivalent-ignore'])]
properties = {'title': [None]}
def when(self, matches, context):
with_year_in_group = []
with_year = []
titles = matches.named('title')
for title_match in titles:
filepart = matches.markers.at_match(title_match, lambda marker: marker.name == 'path', 0)
if filepart:
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
if year_match:
group = matches.markers.at_match(year_match, lambda group: group.name == 'group')
if group:
with_year_in_group.append(title_match)
else:
with_year.append(title_match)
to_tag = []
if with_year_in_group:
title_values = set([title_match.value for title_match in with_year_in_group])
to_tag.extend(with_year_in_group)
elif with_year:
title_values = set([title_match.value for title_match in with_year])
to_tag.extend(with_year)
else:
title_values = set([title_match.value for title_match in titles])
to_remove = []
for title_match in titles:
if title_match.value not in title_values:
to_remove.append(title_match)
return to_remove, to_tag

View file

@ -0,0 +1,75 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
type property
"""
from rebulk import CustomRule, Rebulk, POST_PROCESS
from rebulk.match import Match
from ...rules.processors import Processors
def _type(matches, value):
"""
Define type match with given value.
:param matches:
:param value:
:return:
"""
matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
def type_():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(TypeProcessor)
class TypeProcessor(CustomRule):
"""
Post processor to find file type based on all others found matches.
"""
priority = POST_PROCESS
dependency = Processors
properties = {'type': ['episode', 'movie']}
def when(self, matches, context): # pylint:disable=too-many-return-statements
option_type = context.get('type', None)
if option_type:
return option_type
episode = matches.named('episode')
season = matches.named('season')
episode_details = matches.named('episode_details')
if episode or season or episode_details:
return 'episode'
film = matches.named('film')
if film:
return 'movie'
year = matches.named('year')
date = matches.named('date')
if date and not year:
return 'episode'
bonus = matches.named('bonus')
if bonus and not year:
return 'episode'
crc32 = matches.named('crc32')
anime_release_group = matches.named('release_group', lambda match: 'anime' in match.tags)
if crc32 and anime_release_group:
return 'episode'
return 'movie'
def then(self, matches, when_response, context):
_type(matches, when_response)

View file

@ -0,0 +1,87 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
video_codec and video_profile property
"""
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from guessit.rules.common.validators import seps_after, seps_before
from ..common import dash
from ..common.validators import seps_surround
def video_codec():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="video_codec")
rebulk.regex(r"Rv\d{2}", value="Real")
rebulk.regex("Mpeg2", value="Mpeg2")
rebulk.regex("DVDivX", "DivX", value="DivX")
rebulk.regex("XviD", value="XviD")
rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVCHD", value="h264")
rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265")
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
# http://fr.wikipedia.org/wiki/H.264
rebulk.defaults(name="video_profile", validator=seps_surround)
rebulk.regex('10.?bit', 'Hi10P', value='10bit')
rebulk.regex('8.?bit', value='8bit')
rebulk.string('BP', value='BP', tags='video_profile.rule')
rebulk.string('XP', 'EP', value='XP', tags='video_profile.rule')
rebulk.string('MP', value='MP', tags='video_profile.rule')
rebulk.string('HP', 'HiP', value='HP', tags='video_profile.rule')
rebulk.regex('Hi422P', value='Hi422P', tags='video_profile.rule')
rebulk.regex('Hi444PP', value='Hi444PP', tags='video_profile.rule')
rebulk.string('DXVA', value='DXVA', name='video_api')
rebulk.rules(ValidateVideoCodec, VideoProfileRule)
return rebulk
class ValidateVideoCodec(Rule):
"""
Validate video_codec with format property or separated
"""
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for codec in matches.named('video_codec'):
if not seps_before(codec) and \
not matches.at_index(codec.start - 1, lambda match: match.name == 'format'):
ret.append(codec)
continue
if not seps_after(codec):
ret.append(codec)
continue
return ret
class VideoProfileRule(Rule):
"""
Rule to validate video_profile
"""
consequence = RemoveMatch
def when(self, matches, context):
profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
ret = []
for profile in profile_list:
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
if not codec:
codec = matches.next(profile, lambda match: match.name == 'video_codec')
if not codec:
ret.append(profile)
return ret

View file

@ -0,0 +1,67 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Website property.
"""
from pkg_resources import resource_stream # @UnresolvedImport
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
from ...reutils import build_or_pattern
def website():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(name="website")
tlds = [l.strip().decode('utf-8')
for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
if b'--' not in l][1:] # All registered domain extension
safe_tlds = ['com', 'org', 'net'] # For sure a website extension
safe_subdomains = ['www'] # For sure a website subdomain
safe_prefix = ['co', 'com', 'org', 'net'] # Those words before a tlds are sure
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
r'))(?:[^a-z0-9]|$)',
children=True)
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
r'))(?:[^a-z0-9]|$)',
safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
r'\.)+(?:'+build_or_pattern(tlds) +
r'))(?:[^a-z0-9]|$)',
safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)
class PreferTitleOverWebsite(Rule):
"""
If found match is more likely a title, remove website.
"""
consequence = RemoveMatch
@staticmethod
def valid_followers(match):
"""
Validator for next website matches
"""
return any(name in ['season', 'episode', 'year'] for name in match.names)
def when(self, matches, context):
to_remove = []
for website_match in matches.named('website'):
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
if suffix:
to_remove.append(website_match)
return to_remove
rebulk.rules(PreferTitleOverWebsite)
return rebulk

View file

@ -1,89 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import sys
import os
GREEN_FONT = "\x1B[0;32m"
YELLOW_FONT = "\x1B[0;33m"
BLUE_FONT = "\x1B[0;34m"
RED_FONT = "\x1B[0;31m"
RESET_FONT = "\x1B[0m"
def setup_logging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): # pragma: no cover
"""Set up a nice colored logger as the main application logger."""
class SimpleFormatter(logging.Formatter):
def __init__(self, with_time, with_thread):
self.fmt = (('%(asctime)s ' if with_time else '') +
'%(levelname)-8s ' +
'[%(name)s:%(funcName)s' +
(':%(lineno)s' if with_lineno else '') + ']' +
('[%(threadName)s]' if with_thread else '') +
' -- %(message)s')
logging.Formatter.__init__(self, self.fmt)
class ColoredFormatter(logging.Formatter):
def __init__(self, with_time, with_thread):
self.fmt = (('%(asctime)s ' if with_time else '') +
'-CC-%(levelname)-8s ' +
BLUE_FONT + '[%(name)s:%(funcName)s' +
(':%(lineno)s' if with_lineno else '') + ']' +
RESET_FONT + ('[%(threadName)s]' if with_thread else '') +
' -- %(message)s')
logging.Formatter.__init__(self, self.fmt)
def format(self, record):
modpath = record.name.split('.')
record.mname = modpath[0]
record.mmodule = '.'.join(modpath[1:])
result = logging.Formatter.format(self, record)
if record.levelno == logging.DEBUG:
color = BLUE_FONT
elif record.levelno == logging.INFO:
color = GREEN_FONT
elif record.levelno == logging.WARNING:
color = YELLOW_FONT
else:
color = RED_FONT
result = result.replace('-CC-', color)
return result
if filename is not None:
# make sure we can write to our log file
logdir = os.path.dirname(filename)
if not os.path.exists(logdir):
os.makedirs(logdir)
ch = logging.FileHandler(filename, mode='w')
ch.setFormatter(SimpleFormatter(with_time, with_thread))
else:
ch = logging.StreamHandler()
if colored and sys.platform != 'win32':
ch.setFormatter(ColoredFormatter(with_time, with_thread))
else:
ch.setFormatter(SimpleFormatter(with_time, with_thread))
logging.getLogger().addHandler(ch)

Binary file not shown.

View file

@ -1,26 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
from guessit.slogging import setup_logging
setup_logging()
logging.disable(logging.INFO)
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name

View file

@ -1,40 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test import (test_api, test_autodetect, test_autodetect_all, test_doctests,
test_episode, test_hashes, test_language, test_main,
test_matchtree, test_movie, test_quality, test_utils)
from unittest import TextTestRunner
import logging
def main():
for suite in [test_api.suite, test_autodetect.suite,
test_autodetect_all.suite, test_doctests.suite,
test_episode.suite, test_hashes.suite, test_language.suite,
test_main.suite, test_matchtree.suite, test_movie.suite,
test_quality.suite, test_utils.suite]:
TextTestRunner(verbosity=2).run(suite)
if __name__ == '__main__':
main()

View file

@ -1,489 +0,0 @@
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
: type: movie
title: Fear and Loathing in Las Vegas
year: 1998
screenSize: 720p
format: HD-DVD
audioCodec: DTS
videoCodec: h264
releaseGroup: ESiR
? Leopard.dmg
: type: unknown
extension: dmg
? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
: type: episode
series: Duckman
season: 1
episodeNumber: 1
title: I, Duckman
date: 2002-11-07
? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
: type: episode
series: Neverwhere
episodeNumber: 5
title: Down Street
website: tvu.org.ru
? Neverwhere.05.Down.Street.[tvu.org.ru].avi
: type: episode
series: Neverwhere
episodeNumber: 5
title: Down Street
website: tvu.org.ru
? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
: type: episode
series: Breaking Bad
episodeFormat: Minisode
episodeNumber: 1
title: Good Cop Bad Cop
format: WEBRip
videoCodec: XviD
? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
: type: episode
series: Kaamelott
episodeNumber: 23
title: Le Forfait
? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
: type: movie
title: The Doors
year: 1991
date: 2008-03-09
format: BluRay
screenSize: 720p
audioCodec: AC3
videoCodec: h264
releaseGroup: HiS@SiLUHD
language: english
website: sharethefiles.com
? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
: type: movie
title: M.A.S.H.
year: 1970
videoCodec: DivX
format: DVD
? the.mentalist.501.hdtv-lol.mp4
: type: episode
series: The Mentalist
season: 5
episodeNumber: 1
format: HDTV
releaseGroup: LOL
? the.simpsons.2401.hdtv-lol.mp4
: type: episode
series: The Simpsons
season: 24
episodeNumber: 1
format: HDTV
releaseGroup: LOL
? Homeland.S02E01.HDTV.x264-EVOLVE.mp4
: type: episode
series: Homeland
season: 2
episodeNumber: 1
format: HDTV
videoCodec: h264
releaseGroup: EVOLVE
? /media/Band_of_Brothers-e01-Currahee.mkv
: type: episode
series: Band of Brothers
episodeNumber: 1
title: Currahee
? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
: type: episode
series: Band of Brothers
bonusNumber: 2
bonusTitle: We Stand Alone Together
? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
: type: movie
title: Casino Royale
filmSeries: James Bond
filmNumber: 21
bonusNumber: 2
bonusTitle: Stunts
? /TV Shows/new.girl.117.hdtv-lol.mp4
: type: episode
series: New Girl
season: 1
episodeNumber: 17
format: HDTV
releaseGroup: LOL
? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
: type: episode
series: The Office (US)
country: US
season: 1
episodeNumber: 3
title: Health Care
format: HDTV
videoCodec: XviD
releaseGroup: LOL
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
: type: movie
title: The Insider
year: 1999
bonusNumber: 2
bonusTitle: 60 Minutes Interview-1996
? OSS_117--Cairo,_Nest_of_Spies.mkv
: type: movie
title: OSS 117--Cairo, Nest of Spies
? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
: type: movie
title: Rush Beyond The Lighted Stage
bonusNumber: 9
bonusTitle: Between Sun and Moon-2002 Hartford
? House.Hunters.International.S56E06.720p.hdtv.x264.mp4
: type: episode
series: House Hunters International
season: 56
episodeNumber: 6
screenSize: 720p
format: HDTV
videoCodec: h264
? White.House.Down.2013.1080p.BluRay.DTS-HD.MA.5.1.x264-PublicHD.mkv
: type: movie
title: White House Down
year: 2013
screenSize: 1080p
format: BluRay
audioCodec: DTS
audioProfile: HDMA
videoCodec: h264
releaseGroup: PublicHD
audioChannels: "5.1"
? White.House.Down.2013.1080p.BluRay.DTSHD.MA.5.1.x264-PublicHD.mkv
: type: movie
title: White House Down
year: 2013
screenSize: 1080p
format: BluRay
audioCodec: DTS
audioProfile: HDMA
videoCodec: h264
releaseGroup: PublicHD
audioChannels: "5.1"
? Hostages.S01E01.Pilot.for.Air.720p.WEB-DL.DD5.1.H.264-NTb.nfo
: type: episodeinfo
series: Hostages
title: Pilot for Air
season: 1
episodeNumber: 1
screenSize: 720p
format: WEB-DL
audioChannels: "5.1"
videoCodec: h264
audioCodec: DolbyDigital
releaseGroup: NTb
? Despicable.Me.2.2013.1080p.BluRay.x264-VeDeTT.nfo
: type: movieinfo
title: Despicable Me 2
year: 2013
screenSize: 1080p
format: BluRay
videoCodec: h264
releaseGroup: VeDeTT
? Le Cinquieme Commando 1971 SUBFORCED FRENCH DVDRiP XViD AC3 Bandix.mkv
: type: movie
audioCodec: AC3
format: DVD
releaseGroup: Bandix
subtitleLanguage: French
title: Le Cinquieme Commando
videoCodec: XviD
year: 1971
? Le Seigneur des Anneaux - La Communauté de l'Anneau - Version Longue - BDRip.mkv
: type: movie
format: BluRay
title: Le Seigneur des Anneaux
? La petite bande (Michel Deville - 1983) VF PAL MP4 x264 AAC.mkv
: type: movie
audioCodec: AAC
language: French
title: La petite bande
videoCodec: h264
year: 1983
? Retour de Flammes (Gregor Schnitzler 2003) FULL DVD.iso
: type: movie
format: DVD
title: Retour de Flammes
type: movie
year: 2003
? A.Common.Title.Special.2014.avi
: type: movie
year: 2014
title: A Common Title Special
? A.Common.Title.2014.Special.avi
: type: episode
year: 2014
series: A Common Title
title: Special
episodeDetails: Special
? A.Common.Title.2014.Special.Edition.avi
: type: movie
year: 2014
title: A Common Title
edition: Special Edition
? Downton.Abbey.2013.Christmas.Special.HDTV.x264-FoV.mp4
: type: episode
year: 2013
series: Downton Abbey
title: Christmas Special
videoCodec: h264
releaseGroup: FoV
format: HDTV
episodeDetails: Special
? Doctor_Who_2013_Christmas_Special.The_Time_of_The_Doctor.HD
: options: -n
type: episode
series: Doctor Who
other: HD
episodeDetails: Special
title: Christmas Special The Time of The Doctor
year: 2013
? Doctor Who 2005 50th Anniversary Special The Day of the Doctor 3.avi
: type: episode
series: Doctor Who
episodeDetails: Special
title: 50th Anniversary Special The Day of the Doctor 3
year: 2005
? Robot Chicken S06-Born Again Virgin Christmas Special HDTV x264.avi
: type: episode
series: Robot Chicken
format: HDTV
season: 6
title: Born Again Virgin Christmas Special
videoCodec: h264
episodeDetails: Special
? Wicked.Tuna.S03E00.Head.To.Tail.Special.HDTV.x264-YesTV
: options: -n
type: episode
series: Wicked Tuna
title: Head To Tail Special
releaseGroup: YesTV
season: 3
episodeNumber: 0
videoCodec: h264
format: HDTV
episodeDetails: Special
? The.Voice.UK.S03E12.HDTV.x264-C4TV
: options: -n
episodeNumber: 12
videoCodec: h264
format: HDTV
series: The Voice (UK)
releaseGroup: C4TV
season: 3
country: United Kingdom
type: episode
? /tmp/star.trek.9/star.trek.9.mkv
: type: movie
title: star trek 9
? star.trek.9.mkv
: type: movie
title: star trek 9
? FlexGet.S01E02.TheName.HDTV.xvid
: options: -n
episodeNumber: 2
format: HDTV
season: 1
series: FlexGet
title: TheName
type: episode
videoCodec: XviD
? FlexGet.S01E02.TheName.HDTV.xvid
: options: -n
episodeNumber: 2
format: HDTV
season: 1
series: FlexGet
title: TheName
type: episode
videoCodec: XviD
? some.series.S03E14.Title.Here.720p
: options: -n
episodeNumber: 14
screenSize: 720p
season: 3
series: some series
title: Title Here
type: episode
? '[the.group] Some.Series.S03E15.Title.Two.720p'
: options: -n
episodeNumber: 15
releaseGroup: the.group
screenSize: 720p
season: 3
series: Some Series
title: Title Two
type: episode
? 'HD 720p: Some series.S03E16.Title.Three'
: options: -n
episodeNumber: 16
other: HD
screenSize: 720p
season: 3
series: Some series
title: Title Three
type: episode
? Something.Season.2.1of4.Ep.Title.HDTV.torrent
: episodeCount: 4
episodeNumber: 1
format: HDTV
season: 2
series: Something
title: Title
type: episode
? Show-A (US) - Episode Title S02E09 hdtv
: options: -n
country: US
episodeNumber: 9
format: HDTV
season: 2
series: Show-A (US)
type: episode
? Jack's.Show.S03E01.blah.1080p
: options: -n
episodeNumber: 1
screenSize: 1080p
season: 3
series: Jack's Show
title: blah
type: episode
? FlexGet.epic
: options: -n
title: FlexGet epic
type: movie
? FlexGet.Apt.1
: options: -n
title: FlexGet Apt 1
type: movie
? FlexGet.aptitude
: options: -n
title: FlexGet aptitude
type: movie
? FlexGet.Step1
: options: -n
title: FlexGet Step1
type: movie
? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720 * 432].avi
: format: DVD
screenSize: 720x432
title: El Bosque Animado
videoCodec: XviD
year: 1987
type: movie
? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi
: format: DVD
screenSize: 720x432
title: El Bosque Animado
videoCodec: XviD
year: 1987
type: movie
? 2009.shoot.fruit.chan.multi.dvd9.pal
: options: -n
format: DVD
language: mul
other: PAL
title: shoot fruit chan
type: movie
year: 2009
? 2009.shoot.fruit.chan.multi.dvd5.pal
: options: -n
format: DVD
language: mul
other: PAL
title: shoot fruit chan
type: movie
year: 2009
? The.Flash.2014.S01E01.PREAIR.WEBRip.XviD-EVO.avi
: episodeNumber: 1
format: WEBRip
other: Preair
releaseGroup: EVO
season: 1
series: The Flash
type: episode
videoCodec: XviD
year: 2014
? Ice.Lake.Rebels.S01E06.Ice.Lake.Games.720p.HDTV.x264-DHD
: options: -n
episodeNumber: 6
format: HDTV
releaseGroup: DHD
screenSize: 720p
season: 1
series: Ice Lake Rebels
title: Ice Lake Games
type: episode
videoCodec: h264
? The League - S06E10 - Epi Sexy.mkv
: episodeNumber: 10
season: 6
series: The League
title: Epi Sexy
type: episode
? Stay (2005) [1080p]/Stay.2005.1080p.BluRay.x264.YIFY.mp4
: format: BluRay
releaseGroup: YIFY
screenSize: 1080p
title: Stay
type: movie
videoCodec: h264
year: 2005

View file

@ -1 +0,0 @@
Just a dummy srt file (used for unittests: do not remove!)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,187 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import base_text_type, u
from collections import defaultdict
from unittest import TestCase, TestLoader, TextTestRunner
import shlex
import babelfish
import yaml, logging, sys, os
from os.path import *
def currentPath():
'''Returns the path in which the calling file is located.'''
return dirname(join(os.getcwd(), sys._getframe(1).f_globals['__file__']))
def addImportPath(path):
'''Function that adds the specified path to the import path. The path can be
absolute or relative to the calling file.'''
importPath = abspath(join(currentPath(), path))
sys.path = [importPath] + sys.path
log = logging.getLogger(__name__)
from guessit.plugins import transformers
from guessit.options import get_opts
import guessit
from guessit import *
from guessit.matcher import *
from guessit.fileutils import *
def allTests(testClass):
return TestLoader().loadTestsFromTestCase(testClass)
class TestGuessit(TestCase):
def checkMinimumFieldsCorrect(self, filename, filetype=None, remove_type=True,
exclude_files=None):
groundTruth = yaml.load(load_file_in_same_dir(__file__, filename))
def guess_func(string, options=None):
return guess_file_info(string, options=options, type=filetype)
return self.checkFields(groundTruth, guess_func, remove_type, exclude_files)
def checkFields(self, groundTruth, guess_func, remove_type=True,
exclude_files=None):
total = 0
exclude_files = exclude_files or []
fails = defaultdict(list)
additionals = defaultdict(list)
for filename, required_fields in groundTruth.items():
filename = u(filename)
if filename in exclude_files:
continue
log.debug('\n' + '-' * 120)
log.info('Guessing information for file: %s' % filename)
options = required_fields.pop('options') if 'options' in required_fields else None
if options:
args = shlex.split(options)
options = get_opts().parse_args(args)
options = vars(options)
try:
found = guess_func(filename, options)
except Exception as e:
fails[filename].append("An exception has occured in %s: %s" % (filename, e))
log.exception("An exception has occured in %s: %s" % (filename, e))
continue
total = total + 1
# no need for these in the unittests
if remove_type:
try:
del found['type']
except:
pass
for prop in ('container', 'mimetype', 'unidentified'):
if prop in found:
del found[prop]
# props which are list of just 1 elem should be opened for easier writing of the tests
for prop in ('language', 'subtitleLanguage', 'other', 'episodeDetails', 'unidentified'):
value = found.get(prop, None)
if isinstance(value, list) and len(value) == 1:
found[prop] = value[0]
# look for missing properties
for prop, value in required_fields.items():
if prop not in found:
log.debug("Prop '%s' not found in: %s" % (prop, filename))
fails[filename].append("'%s' not found in: %s" % (prop, filename))
continue
# if both properties are strings, do a case-insensitive comparison
if (isinstance(value, base_text_type) and
isinstance(found[prop], base_text_type)):
if value.lower() != found[prop].lower():
log.debug("Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
elif isinstance(value, list) and isinstance(found[prop], list):
if found[prop] and isinstance(found[prop][0], babelfish.Language):
# list of languages
s1 = set(Language.fromguessit(s) for s in value)
s2 = set(found[prop])
else:
# by default we assume list of strings and do a case-insensitive
# comparison on their elements
s1 = set(u(s).lower() for s in value)
s2 = set(u(s).lower() for s in found[prop])
if s1 != s2:
log.debug("Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
elif isinstance(found[prop], babelfish.Language):
try:
if babelfish.Language.fromguessit(value) != found[prop]:
raise ValueError
except:
log.debug("Wrong prop value [Language] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
elif isinstance(found[prop], babelfish.Country):
try:
if babelfish.Country.fromguessit(value) != found[prop]:
raise ValueError
except:
log.debug("Wrong prop value [Country] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
# otherwise, just compare their values directly
else:
if found[prop] != value:
log.debug("Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
fails[filename].append("'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
# look for additional properties
for prop, value in found.items():
if prop not in required_fields:
log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value)))
additionals[filename].append("'%s': '%s'" % (prop, u(value)))
correct = total - len(fails)
log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total))
for failed_entry, failed_properties in fails.items():
log.error('---- ' + failed_entry + ' ----')
for failed_property in failed_properties:
log.error("FAILED: " + failed_property)
for additional_entry, additional_properties in additionals.items():
log.warning('---- ' + additional_entry + ' ----')
for additional_property in additional_properties:
log.warning("ADDITIONAL: " + additional_property)
self.assertTrue(correct == total,
msg='Correct: %d < Total: %d' % (correct, total))

View file

@ -1,473 +0,0 @@
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
aar aa Afar, afar 0 0
abk ab Abkhazian 0 0
ace Achinese 0 0
ach Acoli 0 0
ada Adangme 0 0
ady adyghé 0 0
afa Afro-Asiatic (Other) 0 0
afh Afrihili 0 0
afr af Afrikaans 0 0
ain Ainu 0 0
aka ak Akan 0 0
akk Akkadian 0 0
alb sq Albanian 1 1
ale Aleut 0 0
alg Algonquian languages 0 0
alt Southern Altai 0 0
amh am Amharic 0 0
ang English, Old (ca.450-1100) 0 0
apa Apache languages 0 0
ara ar Arabic 1 1
arc Aramaic 0 0
arg an Aragonese 0 0
arm hy Armenian 1 0
arn Araucanian 0 0
arp Arapaho 0 0
art Artificial (Other) 0 0
arw Arawak 0 0
asm as Assamese 0 0
ast Asturian, Bable 0 0
ath Athapascan languages 0 0
aus Australian languages 0 0
ava av Avaric 0 0
ave ae Avestan 0 0
awa Awadhi 0 0
aym ay Aymara 0 0
aze az Azerbaijani 0 0
bad Banda 0 0
bai Bamileke languages 0 0
bak ba Bashkir 0 0
bal Baluchi 0 0
bam bm Bambara 0 0
ban Balinese 0 0
baq eu Basque 1 1
bas Basa 0 0
bat Baltic (Other) 0 0
bej Beja 0 0
bel be Belarusian 0 0
bem Bemba 0 0
ben bn Bengali 1 0
ber Berber (Other) 0 0
bho Bhojpuri 0 0
bih bh Bihari 0 0
bik Bikol 0 0
bin Bini 0 0
bis bi Bislama 0 0
bla Siksika 0 0
bnt Bantu (Other) 0 0
bos bs Bosnian 1 0
bra Braj 0 0
bre br Breton 1 0
btk Batak (Indonesia) 0 0
bua Buriat 0 0
bug Buginese 0 0
bul bg Bulgarian 1 1
bur my Burmese 0 0
byn Blin 0 0
cad Caddo 0 0
cai Central American Indian (Other) 0 0
car Carib 0 0
cat ca Catalan 1 1
cau Caucasian (Other) 0 0
ceb Cebuano 0 0
cel Celtic (Other) 0 0
cha ch Chamorro 0 0
chb Chibcha 0 0
che ce Chechen 0 0
chg Chagatai 0 0
chi zh Chinese 1 1
chk Chuukese 0 0
chm Mari 0 0
chn Chinook jargon 0 0
cho Choctaw 0 0
chp Chipewyan 0 0
chr Cherokee 0 0
chu cu Church Slavic 0 0
chv cv Chuvash 0 0
chy Cheyenne 0 0
cmc Chamic languages 0 0
cop Coptic 0 0
cor kw Cornish 0 0
cos co Corsican 0 0
cpe Creoles and pidgins, English based (Other) 0 0
cpf Creoles and pidgins, French-based (Other) 0 0
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
cre cr Cree 0 0
crh Crimean Tatar 0 0
crp Creoles and pidgins (Other) 0 0
csb Kashubian 0 0
cus Cushitic (Other)' couchitiques, autres langues 0 0
cze cs Czech 1 1
dak Dakota 0 0
dan da Danish 1 1
dar Dargwa 0 0
day Dayak 0 0
del Delaware 0 0
den Slave (Athapascan) 0 0
dgr Dogrib 0 0
din Dinka 0 0
div dv Divehi 0 0
doi Dogri 0 0
dra Dravidian (Other) 0 0
dua Duala 0 0
dum Dutch, Middle (ca.1050-1350) 0 0
dut nl Dutch 1 1
dyu Dyula 0 0
dzo dz Dzongkha 0 0
efi Efik 0 0
egy Egyptian (Ancient) 0 0
eka Ekajuk 0 0
elx Elamite 0 0
eng en English 1 1
enm English, Middle (1100-1500) 0 0
epo eo Esperanto 1 0
est et Estonian 1 1
ewe ee Ewe 0 0
ewo Ewondo 0 0
fan Fang 0 0
fao fo Faroese 0 0
fat Fanti 0 0
fij fj Fijian 0 0
fil Filipino 0 0
fin fi Finnish 1 1
fiu Finno-Ugrian (Other) 0 0
fon Fon 0 0
fre fr French 1 1
frm French, Middle (ca.1400-1600) 0 0
fro French, Old (842-ca.1400) 0 0
fry fy Frisian 0 0
ful ff Fulah 0 0
fur Friulian 0 0
gaa Ga 0 0
gay Gayo 0 0
gba Gbaya 0 0
gem Germanic (Other) 0 0
geo ka Georgian 1 1
ger de German 1 1
gez Geez 0 0
gil Gilbertese 0 0
gla gd Gaelic 0 0
gle ga Irish 0 0
glg gl Galician 1 1
glv gv Manx 0 0
gmh German, Middle High (ca.1050-1500) 0 0
goh German, Old High (ca.750-1050) 0 0
gon Gondi 0 0
gor Gorontalo 0 0
got Gothic 0 0
grb Grebo 0 0
grc Greek, Ancient (to 1453) 0 0
ell el Greek 1 1
grn gn Guarani 0 0
guj gu Gujarati 0 0
gwi Gwich´in 0 0
hai Haida 0 0
hat ht Haitian 0 0
hau ha Hausa 0 0
haw Hawaiian 0 0
heb he Hebrew 1 1
her hz Herero 0 0
hil Hiligaynon 0 0
him Himachali 0 0
hin hi Hindi 1 1
hit Hittite 0 0
hmn Hmong 0 0
hmo ho Hiri Motu 0 0
hrv hr Croatian 1 1
hun hu Hungarian 1 1
hup Hupa 0 0
iba Iban 0 0
ibo ig Igbo 0 0
ice is Icelandic 1 1
ido io Ido 0 0
iii ii Sichuan Yi 0 0
ijo Ijo 0 0
iku iu Inuktitut 0 0
ile ie Interlingue 0 0
ilo Iloko 0 0
ina ia Interlingua (International Auxiliary Language Asso 0 0
inc Indic (Other) 0 0
ind id Indonesian 1 1
ine Indo-European (Other) 0 0
inh Ingush 0 0
ipk ik Inupiaq 0 0
ira Iranian (Other) 0 0
iro Iroquoian languages 0 0
ita it Italian 1 1
jav jv Javanese 0 0
jpn ja Japanese 1 1
jpr Judeo-Persian 0 0
jrb Judeo-Arabic 0 0
kaa Kara-Kalpak 0 0
kab Kabyle 0 0
kac Kachin 0 0
kal kl Kalaallisut 0 0
kam Kamba 0 0
kan kn Kannada 0 0
kar Karen 0 0
kas ks Kashmiri 0 0
kau kr Kanuri 0 0
kaw Kawi 0 0
kaz kk Kazakh 1 0
kbd Kabardian 0 0
kha Khasi 0 0
khi Khoisan (Other) 0 0
khm km Khmer 1 1
kho Khotanese 0 0
kik ki Kikuyu 0 0
kin rw Kinyarwanda 0 0
kir ky Kirghiz 0 0
kmb Kimbundu 0 0
kok Konkani 0 0
kom kv Komi 0 0
kon kg Kongo 0 0
kor ko Korean 1 1
kos Kosraean 0 0
kpe Kpelle 0 0
krc Karachay-Balkar 0 0
kro Kru 0 0
kru Kurukh 0 0
kua kj Kuanyama 0 0
kum Kumyk 0 0
kur ku Kurdish 0 0
kut Kutenai 0 0
lad Ladino 0 0
lah Lahnda 0 0
lam Lamba 0 0
lao lo Lao 0 0
lat la Latin 0 0
lav lv Latvian 1 0
lez Lezghian 0 0
lim li Limburgan 0 0
lin ln Lingala 0 0
lit lt Lithuanian 1 0
lol Mongo 0 0
loz Lozi 0 0
ltz lb Luxembourgish 1 0
lua Luba-Lulua 0 0
lub lu Luba-Katanga 0 0
lug lg Ganda 0 0
lui Luiseno 0 0
lun Lunda 0 0
luo Luo (Kenya and Tanzania) 0 0
lus lushai 0 0
mac mk Macedonian 1 1
mad Madurese 0 0
mag Magahi 0 0
mah mh Marshallese 0 0
mai Maithili 0 0
mak Makasar 0 0
mal ml Malayalam 0 0
man Mandingo 0 0
mao mi Maori 0 0
map Austronesian (Other) 0 0
mar mr Marathi 0 0
mas Masai 0 0
may ms Malay 1 1
mdf Moksha 0 0
mdr Mandar 0 0
men Mende 0 0
mga Irish, Middle (900-1200) 0 0
mic Mi'kmaq 0 0
min Minangkabau 0 0
mis Miscellaneous languages 0 0
mkh Mon-Khmer (Other) 0 0
mlg mg Malagasy 0 0
mlt mt Maltese 0 0
mnc Manchu 0 0
mni Manipuri 0 0
mno Manobo languages 0 0
moh Mohawk 0 0
mol mo Moldavian 0 0
mon mn Mongolian 1 0
mos Mossi 0 0
mwl Mirandese 0 0
mul Multiple languages 0 0
mun Munda languages 0 0
mus Creek 0 0
mwr Marwari 0 0
myn Mayan languages 0 0
myv Erzya 0 0
nah Nahuatl 0 0
nai North American Indian 0 0
nap Neapolitan 0 0
nau na Nauru 0 0
nav nv Navajo 0 0
nbl nr Ndebele, South 0 0
nde nd Ndebele, North 0 0
ndo ng Ndonga 0 0
nds Low German 0 0
nep ne Nepali 0 0
new Nepal Bhasa 0 0
nia Nias 0 0
nic Niger-Kordofanian (Other) 0 0
niu Niuean 0 0
nno nn Norwegian Nynorsk 0 0
nob nb Norwegian Bokmal 0 0
nog Nogai 0 0
non Norse, Old 0 0
nor no Norwegian 1 1
nso Northern Sotho 0 0
nub Nubian languages 0 0
nwc Classical Newari 0 0
nya ny Chichewa 0 0
nym Nyamwezi 0 0
nyn Nyankole 0 0
nyo Nyoro 0 0
nzi Nzima 0 0
oci oc Occitan 1 1
oji oj Ojibwa 0 0
ori or Oriya 0 0
orm om Oromo 0 0
osa Osage 0 0
oss os Ossetian 0 0
ota Turkish, Ottoman (1500-1928) 0 0
oto Otomian languages 0 0
paa Papuan (Other) 0 0
pag Pangasinan 0 0
pal Pahlavi 0 0
pam Pampanga 0 0
pan pa Panjabi 0 0
pap Papiamento 0 0
pau Palauan 0 0
peo Persian, Old (ca.600-400 B.C.) 0 0
per fa Persian 1 1
phi Philippine (Other) 0 0
phn Phoenician 0 0
pli pi Pali 0 0
pol pl Polish 1 1
pon Pohnpeian 0 0
por pt Portuguese 1 1
pra Prakrit languages 0 0
pro Provençal, Old (to 1500) 0 0
pus ps Pushto 0 0
que qu Quechua 0 0
raj Rajasthani 0 0
rap Rapanui 0 0
rar Rarotongan 0 0
roa Romance (Other) 0 0
roh rm Raeto-Romance 0 0
rom Romany 0 0
run rn Rundi 0 0
rup Aromanian 0 0
rus ru Russian 1 1
sad Sandawe 0 0
sag sg Sango 0 0
sah Yakut 0 0
sai South American Indian (Other) 0 0
sal Salishan languages 0 0
sam Samaritan Aramaic 0 0
san sa Sanskrit 0 0
sas Sasak 0 0
sat Santali 0 0
scc sr Serbian 1 1
scn Sicilian 0 0
sco Scots 0 0
sel Selkup 0 0
sem Semitic (Other) 0 0
sga Irish, Old (to 900) 0 0
sgn Sign Languages 0 0
shn Shan 0 0
sid Sidamo 0 0
sin si Sinhalese 1 1
sio Siouan languages 0 0
sit Sino-Tibetan (Other) 0 0
sla Slavic (Other) 0 0
slo sk Slovak 1 1
slv sl Slovenian 1 1
sma Southern Sami 0 0
sme se Northern Sami 0 0
smi Sami languages (Other) 0 0
smj Lule Sami 0 0
smn Inari Sami 0 0
smo sm Samoan 0 0
sms Skolt Sami 0 0
sna sn Shona 0 0
snd sd Sindhi 0 0
snk Soninke 0 0
sog Sogdian 0 0
som so Somali 0 0
son Songhai 0 0
sot st Sotho, Southern 0 0
spa es Spanish 1 1
srd sc Sardinian 0 0
srr Serer 0 0
ssa Nilo-Saharan (Other) 0 0
ssw ss Swati 0 0
suk Sukuma 0 0
sun su Sundanese 0 0
sus Susu 0 0
sux Sumerian 0 0
swa sw Swahili 1 0
swe sv Swedish 1 1
syr Syriac 1 0
tah ty Tahitian 0 0
tai Tai (Other) 0 0
tam ta Tamil 0 0
tat tt Tatar 0 0
tel te Telugu 0 0
tem Timne 0 0
ter Tereno 0 0
tet Tetum 0 0
tgk tg Tajik 0 0
tgl tl Tagalog 1 1
tha th Thai 1 1
tib bo Tibetan 0 0
tig Tigre 0 0
tir ti Tigrinya 0 0
tiv Tiv 0 0
tkl Tokelau 0 0
tlh Klingon 0 0
tli Tlingit 0 0
tmh Tamashek 0 0
tog Tonga (Nyasa) 0 0
ton to Tonga (Tonga Islands) 0 0
tpi Tok Pisin 0 0
tsi Tsimshian 0 0
tsn tn Tswana 0 0
tso ts Tsonga 0 0
tuk tk Turkmen 0 0
tum Tumbuka 0 0
tup Tupi languages 0 0
tur tr Turkish 1 1
tut Altaic (Other) 0 0
tvl Tuvalu 0 0
twi tw Twi 0 0
tyv Tuvinian 0 0
udm Udmurt 0 0
uga Ugaritic 0 0
uig ug Uighur 0 0
ukr uk Ukrainian 1 1
umb Umbundu 0 0
und Undetermined 0 0
urd ur Urdu 1 0
uzb uz Uzbek 0 0
vai Vai 0 0
ven ve Venda 0 0
vie vi Vietnamese 1 1
vol vo Volapük 0 0
vot Votic 0 0
wak Wakashan languages 0 0
wal Walamo 0 0
war Waray 0 0
was Washo 0 0
wel cy Welsh 0 0
wen Sorbian languages 0 0
wln wa Walloon 0 0
wol wo Wolof 0 0
xal Kalmyk 0 0
xho xh Xhosa 0 0
yao Yao 0 0
yap Yapese 0 0
yid yi Yiddish 0 0
yor yo Yoruba 0 0
ypk Yupik languages 0 0
zap Zapotec 0 0
zen Zenaga 0 0
zha za Zhuang 0 0
znd Zande 0 0
zul zu Zulu 0 0
zun Zuni 0 0
rum ro Romanian 1 1
pob pb Brazilian 1 1

View file

@ -0,0 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name

View file

@ -0,0 +1,83 @@
# Multiple input strings having same expected results can be chained.
# Use $ marker to check inputs that should not match results.
? +MP3
? +lame
? +lame3.12
? +lame3.100
: audio_codec: MP3
? +DolbyDigital
? +DD
? +Dolby Digital
: audio_codec: DolbyDigital
? +DolbyAtmos
? +Dolby Atmos
? +Atmos
? -Atmosphere
: audio_codec: DolbyAtmos
? +AAC
: audio_codec: AAC
? +AC3
: audio_codec: AC3
? +Flac
: audio_codec: FLAC
? +DTS
: audio_codec: DTS
? +True-HD
? +trueHD
: audio_codec: TrueHD
? +DTS-HD
: audio_codec: DTS
audio_profile: HD
? +DTS-HDma
: audio_codec: DTS
audio_profile: HDMA
? +AC3-hq
: audio_codec: AC3
audio_profile: HQ
? +AAC-HE
: audio_codec: AAC
audio_profile: HE
? +AAC-LC
: audio_codec: AAC
audio_profile: LC
? +AAC2.0
: audio_codec: AAC
audio_channels: '2.0'
? +7.1
? +7ch
? +8ch
: audio_channels: '7.1'
? +5.1
? +5ch
? +6ch
: audio_channels: '5.1'
? +2ch
? +2.0
? +stereo
: audio_channels: '2.0'
? +1ch
? +mono
: audio_channels: '1.0'
? DD5.1
: audio_codec: DolbyDigital
audio_channels: '5.1'

View file

@ -0,0 +1,9 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? Movie Title-x01-Other Title.mkv
? Movie Title-x01-Other Title
? directory/Movie Title-x01-Other Title/file.mkv
: title: Movie Title
bonus_title: Other Title
bonus: 1

View file

@ -0,0 +1,10 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? cd 1of3
: cd: 1
cd_count: 3
? Some.Title-DVDRIP-x264-CDP
: cd: !!null
release_group: CDP
video_codec: h264

View file

@ -0,0 +1,10 @@
# Multiple input strings having same expected results can be chained.
# Use $ marker to check inputs that should not match results.
? Us.this.is.title
? this.is.title.US
: country: US
title: this is title
? This.is.us.title
: title: This is us title

View file

@ -0,0 +1,50 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? +09.03.08
? +09.03.2008
? +2008.03.09
: date: 2008-03-09
? +31.01.15
? +31.01.2015
? +15.01.31
? +2015.01.31
: date: 2015-01-31
? +01.02.03
: date: 2003-02-01
? +01.02.03
: options: --date-year-first
date: 2001-02-03
? +01.02.03
: options: --date-day-first
date: 2003-02-01
? 1919
? 2030
: !!map {}
? 2029
: year: 2029
? (1920)
: year: 1920
? 2012
: year: 2012
? 2011 2013 (2012) (2015) # first marked year is guessed.
: title: "2011 2013"
year: 2012
? 2012 2009 S01E02 2015 # If no year is marked, the second one is guessed.
: title: "2012"
year: 2009
episode_title: "2015"
? Something 2 mar 2013)
: title: Something
date: 2013-03-02
type: episode

View file

@ -0,0 +1,25 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? Director's cut
? Edition Director's cut
: edition: Director's cut
? Collector
? Collector Edition
? Edition Collector
: edition: Collector Edition
? Special Edition
? Edition Special
? -Special
: edition: Special Edition
? Criterion Edition
? Edition Criterion
? -Criterion
: edition: Criterion Edition
? Deluxe
? Deluxe Edition
? Edition Deluxe
: edition: Deluxe Edition

View file

@ -0,0 +1,247 @@
# Multiple input strings having same expected results can be chained.
# Use $ marker to check inputs that should not match results.
? +2x5
? +2X5
? +02x05
? +2X05
? +02x5
? S02E05
? s02e05
? s02e5
? s2e05
? s02ep05
? s2EP5
? -s03e05
? -s02e06
? -3x05
? -2x06
: season: 2
episode: 5
? "+0102"
? "+102"
: season: 1
episode: 2
? "0102 S03E04"
? "S03E04 102"
: season: 3
episode: 4
? +serie Saison 2 other
? +serie Season 2 other
? +serie Saisons 2 other
? +serie Seasons 2 other
? +serie Serie 2 other
? +serie Series 2 other
? +serie Season Two other
? +serie Season II other
: season: 2
? Some Series.S02E01.Episode.title.mkv
? Some Series/Season 02/E01-Episode title.mkv
? Some Series/Season 02/Some Series-E01-Episode title.mkv
? Some Dummy Directory/Season 02/Some Series-E01-Episode title.mkv
? -Some Dummy Directory/Season 02/E01-Episode title.mkv
? Some Series/Unsafe Season 02/Some Series-E01-Episode title.mkv
? -Some Series/Unsafe Season 02/E01-Episode title.mkv
? Some Series/Season 02/E01-Episode title.mkv
? Some Series/ Season 02/E01-Episode title.mkv
? Some Dummy Directory/Some Series S02/E01-Episode title.mkv
? Some Dummy Directory/S02 Some Series/E01-Episode title.mkv
: title: Some Series
episode_title: Episode title
season: 2
episode: 1
? Some Series.S02E01.mkv
? Some Series/Season 02/E01.mkv
? Some Series/Season 02/Some Series-E01.mkv
? Some Dummy Directory/Season 02/Some Series-E01.mkv
? -Some Dummy Directory/Season 02/E01.mkv
? Some Series/Unsafe Season 02/Some Series-E01.mkv
? -Some Series/Unsafe Season 02/E01.mkv
? Some Series/Season 02/E01.mkv
? Some Series/ Season 02/E01.mkv
? Some Dummy Directory/Some Series S02/E01-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA.mkv
: title: Some Series
season: 2
episode: 1
? Some Series S03E01E02
: title: Some Series
season: 3
episode: [1, 2]
? Some Series S01S02S03
? Some Series S01-02-03
? Some Series S01 S02 S03
? Some Series S01 02 03
: title: Some Series
season: [1, 2, 3]
? Some Series E01E02E03
? Some Series E01-02-03
? Some Series E01-03
? Some Series E01 E02 E03
? Some Series E01 02 03
: title: Some Series
episode: [1, 2, 3]
? Some Series E01E02E04
? Some Series E01 E02 E04
? Some Series E01 02 04
: title: Some Series
episode: [1, 2, 4]
? Some Series E01-02-04
? Some Series E01-04
? Some Series E01-04
: title: Some Series
episode: [1, 2, 3, 4]
? Some Series E01-02-E04
: title: Some Series
episode: [1, 2, 3, 4]
? Episode 3
? -Episode III
: episode: 3
? Episode 3
? Episode III
: options: -t episode
episode: 3
? -A very special movie
: episode_details: Special
? A very special episode
: options: -t episode
episode_details: Special
? 12 Monkeys\Season 01\Episode 05\12 Monkeys - S01E05 - The Night Room.mkv
: container: mkv
title: 12 Monkeys
episode: 5
season: 1
? S03E02.X.1080p
: episode: 2
screen_size: 1080p
season: 3
? Something 1 x 2-FlexGet
: options: -t episode
title: Something
season: 1
episode: 2
episode_title: FlexGet
? Show.Name.-.Season.1.to.3.-.Mp4.1080p
? Show.Name.-.Season.1~3.-.Mp4.1080p
? Show.Name.-.Saison.1.a.3.-.Mp4.1080p
: container: MP4
screen_size: 1080p
season:
- 1
- 2
- 3
title: Show Name
? Show.Name.Season.1.3&5.HDTV.XviD-GoodGroup[SomeTrash]
? Show.Name.Season.1.3 and 5.HDTV.XviD-GoodGroup[SomeTrash]
: format: HDTV
release_group: GoodGroup[SomeTrash]
season:
- 1
- 3
- 5
title: Show Name
type: episode
video_codec: XviD
? Show.Name.Season.1.2.3-5.HDTV.XviD-GoodGroup[SomeTrash]
? Show.Name.Season.1.2.3~5.HDTV.XviD-GoodGroup[SomeTrash]
? Show.Name.Season.1.2.3 to 5.HDTV.XviD-GoodGroup[SomeTrash]
: format: HDTV
release_group: GoodGroup[SomeTrash]
season:
- 1
- 2
- 3
- 4
- 5
title: Show Name
type: episode
video_codec: XviD
? The.Get.Down.S01EP01.FRENCH.720p.WEBRIP.XVID-STR
: episode: 1
format: WEBRip
language: fr
release_group: STR
screen_size: 720p
season: 1
title: The Get Down
type: episode
video_codec: XviD
? My.Name.Is.Earl.S01E01-S01E21.SWE-SUB
: episode:
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
season: 1
subtitle_language: sv
title: My Name Is Earl
type: episode
? Show.Name.Season.4.Episodes.1-12
: episode:
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
season: 4
title: Show Name
type: episode
? show name s01.to.s04
: season:
- 1
- 2
- 3
- 4
title: show name
type: episode
? epi
: options: -t episode
title: epi

View file

@ -0,0 +1,9 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? Film Title-f01-Series Title.mkv
? Film Title-f01-Series Title
? directory/Film Title-f01-Series Title/file.mkv
: title: Series Title
film_title: Film Title
film: 1

View file

@ -0,0 +1,112 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? +VHS
? +VHSRip
? +VHS-Rip
? +VhS_rip
? +VHS.RIP
? -VHSAnythingElse
? -SomeVHS stuff
? -VH
? -VHx
? -VHxRip
: format: VHS
? +Cam
? +CamRip
? +CaM Rip
? +Cam_Rip
? +cam.rip
: format: Cam
? +Telesync
? +TS
? +HD TS
? -Hd.Ts # ts file extension
? -HD.TS # ts file extension
? +Hd-Ts
: format: Telesync
? +Workprint
? +workPrint
? +WorkPrint
? +WP
? -Work Print
: format: Workprint
? +Telecine
? +teleCine
? +TC
? -Tele Cine
: format: Telecine
? +PPV
? +ppv-rip
: format: PPV
? -TV
? +SDTV
? +SDTVRIP
? +Rip sd tv
? +TvRip
? +Rip TV
: format: TV
? +DVB
? +DVB-Rip
? +DvBRiP
? +pdTV
? +Pd Tv
: format: DVB
? +DVD
? +DVD-RIP
? +video ts
? +DVDR
? +DVD 9
? +dvd 5
? -dvd ts
: format: DVD
-format: ts
? +HDTV
? +tv rip hd
? +HDtv Rip
? +HdRip
: format: HDTV
? +VOD
? +VodRip
? +vod rip
: format: VOD
? +webrip
? +Web Rip
: format: WEBRip
? +webdl
? +Web DL
? +webHD
? +WEB hd
? +web
: format: WEB-DL
? +HDDVD
? +hd dvd
? +hdDvdRip
: format: HD-DVD
? +BluRay
? +BluRay rip
? +BD
? +BR
? +BDRip
? +BR rip
? +BD5
? +BD9
? +BD25
? +bd50
: format: BluRay
? XVID.NTSC.DVDR.nfo
: format: DVD

View file

@ -0,0 +1,39 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? +English
? .ENG.
: language: English
? +French
: language: French
? +SubFrench
? +SubFr
? +STFr
? ST.FR
: subtitle_language: French
? +ENG.-.sub.FR
? ENG.-.FR Sub
? +ENG.-.SubFR
? +ENG.-.FRSUB
? +ENG.-.FRSUBS
? +ENG.-.FR-SUBS
: language: English
subtitle_language: French
? "{Fr-Eng}.St{Fr-Eng}"
? "Le.Prestige[x264.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
: language: [French, English]
subtitle_language: [French, English]
? +ENG.-.sub.SWE
? ENG.-.SWE Sub
? +ENG.-.SubSWE
? +ENG.-.SWESUB
? +ENG.-.sub.SV
? ENG.-.SV Sub
? +ENG.-.SubSV
? +ENG.-.SVSUB
: language: English
subtitle_language: Swedish

View file

@ -0,0 +1,137 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? +DVDSCR
? +DVDScreener
? +DVD-SCR
? +DVD Screener
? +DVD AnythingElse Screener
? -DVD AnythingElse SCR
: other: Screener
? +AudioFix
? +AudioFixed
? +Audio Fix
? +Audio Fixed
: other: AudioFix
? +SyncFix
? +SyncFixed
? +Sync Fix
? +Sync Fixed
: other: SyncFix
? +DualAudio
? +Dual Audio
: other: DualAudio
? +ws
? +WideScreen
? +Wide Screen
: other: WideScreen
? +NF
? +Netflix
: other: Netflix
# Fix and Real must be surround by others properties to be matched.
? DVD.Real.XViD
? DVD.fix.XViD
? -DVD.Real
? -DVD.Fix
? -Real.XViD
? -Fix.XViD
: other: Proper
proper_count: 1
? -DVD.BlablaBla.Fix.Blablabla.XVID
? -DVD.BlablaBla.Fix.XVID
? -DVD.Fix.Blablabla.XVID
: other: Proper
proper_count: 1
? DVD.Real.PROPER.REPACK
: other: Proper
proper_count: 3
? Proper
? +Repack
? +Rerip
: other: Proper
proper_count: 1
? XViD.Fansub
: other: Fansub
? XViD.Fastsub
: other: Fastsub
? +Season Complete
? -Complete
: other: Complete
? R5
? RC
: other: R5
? PreAir
? Pre Air
: other: Preair
? Screener
: other: Screener
? Remux
: other: Remux
? 3D
: other: 3D
? HD
: other: HD
? mHD # ??
: other: mHD
? HDLight
: other: HDLight
? HQ
: other: HQ
? ddc
: other: DDC
? hr
: other: HR
? PAL
: other: PAL
? SECAM
: other: SECAM
? NTSC
: other: NTSC
? CC
: other: CC
? LD
: other: LD
? MD
: other: MD
? -The complete movie
: other: Complete
? +The complete movie
: title: The complete movie
? +AC3-HQ
: audio_profile: HQ
? Other-HQ
: other: HQ

View file

@ -0,0 +1,18 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
? Filename Part 3.mkv
? Filename Part III.mkv
? Filename Part Three.mkv
? Filename Part Trois.mkv
: title: Filename
part: 3
? Part 3
? Part III
? Part Three
? Part Trois
? Part3
: part: 3
? -Something.Apt.1
: part: 1

View file

@ -0,0 +1,8 @@
# Multiple input strings having same expected results can be chained.
# Use $ marker to check inputs that should not match results.
# Prefer information for last path.
? Some movie (2000)/Some movie (2001).mkv
? Some movie (2001)/Some movie.mkv
: year: 2001
container: mkv

Some files were not shown because too many files have changed in this diff Show more