Updates vendored subliminal to 2.1.0

Updates rarfile to 3.1
Updates stevedore to 3.5.0
Updates appdirs to 1.4.4
Updates click to 8.1.3
Updates decorator to 5.1.1
Updates dogpile.cache to 1.1.8
Updates pbr to 5.11.0
Updates pysrt to 1.1.2
Updates pytz to 2022.6
Adds importlib-metadata version 3.1.1
Adds typing-extensions version 4.1.1
Adds zipp version 3.11.0
This commit is contained in:
Labrys of Knossos 2022-11-29 00:08:39 -05:00
commit f05b09f349
694 changed files with 16621 additions and 11056 deletions

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
__title__ = 'subliminal'
__version__ = '2.0.5'
__version__ = '2.1.0'
__short_version__ = '.'.join(__version__.split('.')[:2])
__author__ = 'Antoine Bertin'
__license__ = 'MIT'

View file

@ -1,7 +1,9 @@
# -*- coding: utf-8 -*-
import datetime
import six
from dogpile.cache import make_region
from dogpile.cache.util import function_key_generator
#: Expiration time for show caching
SHOW_EXPIRATION_TIME = datetime.timedelta(weeks=3).total_seconds()
@ -13,4 +15,23 @@ EPISODE_EXPIRATION_TIME = datetime.timedelta(days=3).total_seconds()
REFINER_EXPIRATION_TIME = datetime.timedelta(weeks=1).total_seconds()
region = make_region()
def _to_native_str(value):
if six.PY2:
# In Python 2, the native string type is bytes
if isinstance(value, six.text_type): # unicode for Python 2
return value.encode('utf-8')
else:
return six.binary_type(value)
else:
# In Python 3, the native string type is unicode
if isinstance(value, six.binary_type): # bytes for Python 3
return value.decode('utf-8')
else:
return six.text_type(value)
def to_native_str_key_generator(namespace, fn, to_str=_to_native_str):
return function_key_generator(namespace, fn, to_str)
region = make_region(function_key_generator=to_native_str_key_generator)

View file

@ -163,6 +163,26 @@ class Config(object):
for k, v in config.items():
self.config.set(provider, k, v)
@property
def refiner_configs(self):
rv = {}
for refiner in refiner_manager:
if self.config.has_section(refiner.name):
rv[refiner.name] = {k: v for k, v in self.config.items(refiner.name)}
return rv
@refiner_configs.setter
def refiner_configs(self, value):
# loop over refiner configurations
for refiner, config in value.items():
# create the corresponding section if necessary
if not self.config.has_section(refiner):
self.config.add_section(refiner)
# add config options
for k, v in config.items():
self.config.set(refiner, k, v)
class LanguageParamType(click.ParamType):
""":class:`~click.ParamType` for languages that returns a :class:`~babelfish.language.Language`"""
@ -174,6 +194,7 @@ class LanguageParamType(click.ParamType):
except BabelfishError:
self.fail('%s is not a valid language' % value)
LANGUAGE = LanguageParamType()
@ -202,6 +223,7 @@ class AgeParamType(click.ParamType):
return timedelta(**{k: int(v) for k, v in match.groupdict(0).items()})
AGE = AgeParamType()
PROVIDER = click.Choice(sorted(provider_manager.names()))
@ -219,13 +241,13 @@ config_file = 'config.ini'
@click.option('--legendastv', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='LegendasTV configuration.')
@click.option('--opensubtitles', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD',
help='OpenSubtitles configuration.')
@click.option('--subscenter', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='SubsCenter configuration.')
@click.option('--omdb', type=click.STRING, nargs=1, metavar='APIKEY', help='OMDB API key.')
@click.option('--cache-dir', type=click.Path(writable=True, file_okay=False), default=dirs.user_cache_dir,
show_default=True, expose_value=True, help='Path to the cache directory.')
@click.option('--debug', is_flag=True, help='Print useful information for debugging subliminal and for reporting bugs.')
@click.version_option(__version__)
@click.pass_context
def subliminal(ctx, addic7ed, legendastv, opensubtitles, subscenter, cache_dir, debug):
def subliminal(ctx, addic7ed, legendastv, opensubtitles, omdb, cache_dir, debug):
"""Subtitles, faster than your thoughts."""
# create cache directory
try:
@ -245,16 +267,23 @@ def subliminal(ctx, addic7ed, legendastv, opensubtitles, subscenter, cache_dir,
logging.getLogger('subliminal').addHandler(handler)
logging.getLogger('subliminal').setLevel(logging.DEBUG)
ctx.obj = {
'provider_configs': {},
'refiner_configs': {}
}
# provider configs
ctx.obj = {'provider_configs': {}}
if addic7ed:
ctx.obj['provider_configs']['addic7ed'] = {'username': addic7ed[0], 'password': addic7ed[1]}
if legendastv:
ctx.obj['provider_configs']['legendastv'] = {'username': legendastv[0], 'password': legendastv[1]}
if opensubtitles:
ctx.obj['provider_configs']['opensubtitles'] = {'username': opensubtitles[0], 'password': opensubtitles[1]}
if subscenter:
ctx.obj['provider_configs']['subscenter'] = {'username': subscenter[0], 'password': subscenter[1]}
ctx.obj['provider_configs']['opensubtitlesvip'] = {'username': opensubtitles[0], 'password': opensubtitles[1]}
# refiner configs
if omdb:
ctx.obj['refiner_configs']['omdb'] = {'apikey': omdb}
@subliminal.command()
@ -324,8 +353,12 @@ def download(obj, provider, refiner, language, age, directory, encoding, single,
continue
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name, directory=directory).values())
refine(video, episode_refiners=refiner, movie_refiners=refiner, embedded_subtitles=not force)
videos.append(video)
if check_video(video, languages=language, age=age, undefined=single):
refine(video, episode_refiners=refiner, movie_refiners=refiner,
refiner_configs=obj['refiner_configs'],
embedded_subtitles=not force, providers=provider, languages=language)
videos.append(video)
continue
# directories
@ -341,7 +374,9 @@ def download(obj, provider, refiner, language, age, directory, encoding, single,
video.subtitle_languages |= set(search_external_subtitles(video.name,
directory=directory).values())
if check_video(video, languages=language, age=age, undefined=single):
refine(video, episode_refiners=refiner, movie_refiners=refiner, embedded_subtitles=not force)
refine(video, episode_refiners=refiner, movie_refiners=refiner,
refiner_configs=obj['refiner_configs'], embedded_subtitles=not force,
providers=provider, languages=language)
videos.append(video)
else:
ignored_videos.append(video)
@ -357,7 +392,9 @@ def download(obj, provider, refiner, language, age, directory, encoding, single,
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name, directory=directory).values())
if check_video(video, languages=language, age=age, undefined=single):
refine(video, episode_refiners=refiner, movie_refiners=refiner, embedded_subtitles=not force)
refine(video, episode_refiners=refiner, movie_refiners=refiner,
refiner_configs=obj['refiner_configs'], embedded_subtitles=not force,
providers=provider, languages=language)
videos.append(video)
else:
ignored_videos.append(video)

View file

@ -6,18 +6,17 @@ import io
import itertools
import logging
import operator
import os.path
import socket
import os
from babelfish import Language, LanguageReverseError
from guessit import guessit
from rarfile import NotRarFile, RarCannotExec, RarFile
import requests
from rarfile import BadRarFile, NotRarFile, RarCannotExec, RarFile, Error, is_rarfile
from zipfile import BadZipfile
from .extensions import provider_manager, refiner_manager
from .extensions import provider_manager, default_providers, refiner_manager
from .score import compute_score as default_compute_score
from .subtitle import SUBTITLE_EXTENSIONS, get_subtitle_path
from .utils import hash_napiprojekt, hash_opensubtitles, hash_shooter, hash_thesubdb
from .subtitle import SUBTITLE_EXTENSIONS
from .utils import handle_exception
from .video import VIDEO_EXTENSIONS, Episode, Movie, Video
#: Supported archive extensions
@ -37,12 +36,12 @@ class ProviderPool(object):
:param list providers: name of providers to use, if not all.
:param dict provider_configs: provider configuration as keyword arguments per provider name to pass when
instanciating the :class:`~subliminal.providers.Provider`.
instantiating the :class:`~subliminal.providers.Provider`.
"""
def __init__(self, providers=None, provider_configs=None):
#: Name of providers to use
self.providers = providers or provider_manager.names()
self.providers = providers or default_providers
#: Provider configuration
self.provider_configs = provider_configs or {}
@ -77,10 +76,8 @@ class ProviderPool(object):
try:
logger.info('Terminating provider %s', name)
self.initialized_providers[name].terminate()
except (requests.Timeout, socket.timeout):
logger.error('Provider %r timed out, improperly terminated', name)
except:
logger.exception('Provider %r terminated unexpectedly', name)
except Exception as e:
handle_exception(e, 'Provider {} improperly terminated'.format(name))
del self.initialized_providers[name]
@ -107,7 +104,7 @@ class ProviderPool(object):
return []
# check supported languages
provider_languages = provider_manager[provider].plugin.languages & languages
provider_languages = provider_manager[provider].plugin.check_languages(languages)
if not provider_languages:
logger.info('Skipping provider %r: no language to search for', provider)
return []
@ -116,10 +113,8 @@ class ProviderPool(object):
logger.info('Listing subtitles with provider %r and languages %r', provider, provider_languages)
try:
return self[provider].list_subtitles(video, provider_languages)
except (requests.Timeout, socket.timeout):
logger.error('Provider %r timed out', provider)
except:
logger.exception('Unexpected error in provider %r', provider)
except Exception as e:
handle_exception(e, 'Provider {}'.format(provider))
def list_subtitles(self, video, languages):
"""List subtitles.
@ -169,14 +164,11 @@ class ProviderPool(object):
logger.info('Downloading subtitle %r', subtitle)
try:
self[subtitle.provider_name].download_subtitle(subtitle)
except (requests.Timeout, socket.timeout):
logger.error('Provider %r timed out, discarding it', subtitle.provider_name)
except (BadZipfile, BadRarFile):
logger.error('Bad archive for subtitle %r', subtitle)
except Exception as e:
handle_exception(e, 'Discarding provider {}'.format(subtitle.provider_name))
self.discarded_providers.add(subtitle.provider_name)
return False
except:
logger.exception('Unexpected error in provider %r, discarding it', subtitle.provider_name)
self.discarded_providers.add(subtitle.provider_name)
return False
# check subtitle validity
if not subtitle.is_valid():
@ -338,7 +330,7 @@ def search_external_subtitles(path, directory=None):
subtitles = {}
for p in os.listdir(directory or dirpath):
# keep only valid subtitle filenames
if not p.startswith(fileroot) or not p.endswith(SUBTITLE_EXTENSIONS):
if not p.startswith(fileroot) or not p.lower().endswith(SUBTITLE_EXTENSIONS):
continue
# extract the potential language code
@ -370,7 +362,7 @@ def scan_video(path):
raise ValueError('Path does not exist')
# check video extension
if not path.endswith(VIDEO_EXTENSIONS):
if not path.lower().endswith(VIDEO_EXTENSIONS):
raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])
dirpath, filename = os.path.split(path)
@ -379,17 +371,9 @@ def scan_video(path):
# guess
video = Video.fromguess(path, guessit(path))
# size and hashes
# size
video.size = os.path.getsize(path)
if video.size > 10485760:
logger.debug('Size is %d', video.size)
video.hashes['opensubtitles'] = hash_opensubtitles(path)
video.hashes['shooter'] = hash_shooter(path)
video.hashes['thesubdb'] = hash_thesubdb(path)
video.hashes['napiprojekt'] = hash_napiprojekt(path)
logger.debug('Computed hashes %r', video.hashes)
else:
logger.warning('Size is lower than 10MB: hashes not computed')
logger.debug('Size is %d', video.size)
return video
@ -406,37 +390,43 @@ def scan_archive(path):
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check video extension
if not path.endswith(ARCHIVE_EXTENSIONS):
raise ValueError('%r is not a valid archive extension' % os.path.splitext(path)[1])
if not is_rarfile(path):
raise ValueError("'{0}' is not a valid archive".format(os.path.splitext(path)[1]))
dirpath, filename = os.path.split(path)
logger.info('Scanning archive %r in %r', filename, dirpath)
dir_path, filename = os.path.split(path)
# rar extension
if filename.endswith('.rar'):
rar = RarFile(path)
logger.info('Scanning archive %r in %r', filename, dir_path)
# filter on video extensions
rar_filenames = [f for f in rar.namelist() if f.endswith(VIDEO_EXTENSIONS)]
# Get filename and file size from RAR
rar = RarFile(path)
# no video found
if not rar_filenames:
raise ValueError('No video in archive')
# check that the rar doesnt need a password
if rar.needs_password():
raise ValueError('Rar requires a password')
# more than one video found
if len(rar_filenames) > 1:
raise ValueError('More than one video in archive')
# raise an exception if the rar file is broken
# must be called to avoid a potential deadlock with some broken rars
rar.testrar()
# guess
rar_filename = rar_filenames[0]
rar_filepath = os.path.join(dirpath, rar_filename)
video = Video.fromguess(rar_filepath, guessit(rar_filepath))
file_info = [f for f in rar.infolist() if not f.isdir() and f.filename.endswith(VIDEO_EXTENSIONS)]
# size
video.size = rar.getinfo(rar_filename).file_size
else:
raise ValueError('Unsupported extension %r' % os.path.splitext(path)[1])
# sort by file size descending, the largest video in the archive is the one we want, there may be samples or intros
file_info.sort(key=operator.attrgetter('file_size'), reverse=True)
# no video found
if not file_info:
raise ValueError('No video in archive')
# Free the information about irrelevant files before guessing
file_info = file_info[0]
# guess
video_filename = file_info.filename
video_path = os.path.join(dir_path, video_filename)
video = Video.fromguess(video_path, guessit(video_path))
# size
video.size = file_info.file_size
return video
@ -471,17 +461,26 @@ def scan_videos(path, age=None, archives=True):
if dirname.startswith('.'):
logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
dirnames.remove(dirname)
# Skip Sample folder
if dirname.lower() == 'sample':
logger.debug('Skipping sample dirname %r in %r', dirname, dirpath)
dirnames.remove(dirname)
# scan for videos
for filename in filenames:
# filter on videos and archives
if not (filename.endswith(VIDEO_EXTENSIONS) or archives and filename.endswith(ARCHIVE_EXTENSIONS)):
if not (filename.lower().endswith(VIDEO_EXTENSIONS) or
archives and filename.lower().endswith(ARCHIVE_EXTENSIONS)):
continue
# skip hidden files
if filename.startswith('.'):
logger.debug('Skipping hidden filename %r in %r', filename, dirpath)
continue
# skip 'sample' media files
if os.path.splitext(filename)[0].lower() == 'sample':
logger.debug('Skipping sample filename %r in %r', filename, dirpath)
continue
# reconstruct the file path
filepath = os.path.join(dirpath, filename)
@ -492,21 +491,27 @@ def scan_videos(path, age=None, archives=True):
continue
# skip old files
if age and datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(filepath)) > age:
logger.debug('Skipping old file %r in %r', filename, dirpath)
try:
file_age = datetime.utcfromtimestamp(os.path.getmtime(filepath))
except ValueError:
logger.warning('Could not get age of file %r in %r', filename, dirpath)
continue
else:
if age and datetime.utcnow() - file_age > age:
logger.debug('Skipping old file %r in %r', filename, dirpath)
continue
# scan
if filename.endswith(VIDEO_EXTENSIONS): # video
if filename.lower().endswith(VIDEO_EXTENSIONS): # video
try:
video = scan_video(filepath)
except ValueError: # pragma: no cover
logger.exception('Error scanning video')
continue
elif archives and filename.endswith(ARCHIVE_EXTENSIONS): # archive
elif archives and filename.lower().endswith(ARCHIVE_EXTENSIONS): # archive
try:
video = scan_archive(filepath)
except (NotRarFile, RarCannotExec, ValueError): # pragma: no cover
except (Error, NotRarFile, RarCannotExec, ValueError): # pragma: no cover
logger.exception('Error scanning archive')
continue
else: # pragma: no cover
@ -517,7 +522,7 @@ def scan_videos(path, age=None, archives=True):
return videos
def refine(video, episode_refiners=None, movie_refiners=None, **kwargs):
def refine(video, episode_refiners=None, movie_refiners=None, refiner_configs=None, **kwargs):
"""Refine a video using :ref:`refiners`.
.. note::
@ -528,6 +533,8 @@ def refine(video, episode_refiners=None, movie_refiners=None, **kwargs):
:type video: :class:`~subliminal.video.Video`
:param tuple episode_refiners: refiners to use for episodes.
:param tuple movie_refiners: refiners to use for movies.
:param dict refiner_configs: refiner configuration as keyword arguments per refiner name to pass when
calling the refine method
:param \*\*kwargs: additional parameters for the :func:`~subliminal.refiners.refine` functions.
"""
@ -536,12 +543,12 @@ def refine(video, episode_refiners=None, movie_refiners=None, **kwargs):
refiners = episode_refiners or ('metadata', 'tvdb', 'omdb')
elif isinstance(video, Movie):
refiners = movie_refiners or ('metadata', 'omdb')
for refiner in refiners:
for refiner in ('hash', ) + refiners:
logger.info('Refining video with %s', refiner)
try:
refiner_manager[refiner].plugin(video, **kwargs)
except:
logger.exception('Failed to refine video')
refiner_manager[refiner].plugin(video, **dict((refiner_configs or {}).get(refiner, {}), **kwargs))
except Exception as e:
handle_exception(e, 'Failed to refine video {0!r}'.format(video.name))
def list_subtitles(videos, languages, pool_class=ProviderPool, **kwargs):
@ -684,7 +691,7 @@ def save_subtitles(video, subtitles, single=False, directory=None, encoding=None
continue
# create subtitle path
subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language)
subtitle_path = subtitle.get_path(video, single=single)
if directory is not None:
subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1])

View file

@ -19,8 +19,8 @@ class AuthenticationError(ProviderError):
pass
class TooManyRequests(ProviderError):
"""Exception raised by providers when too many requests are made."""
class ServiceUnavailable(ProviderError):
"""Exception raised when status is '503 Service Unavailable'."""
pass

View file

@ -29,9 +29,9 @@ class RegistrableExtensionManager(ExtensionManager):
super(RegistrableExtensionManager, self).__init__(namespace, **kwargs)
def _find_entry_points(self, namespace):
def list_entry_points(self):
# copy of default extensions
eps = list(super(RegistrableExtensionManager, self)._find_entry_points(namespace))
eps = list(super(RegistrableExtensionManager, self).list_entry_points())
# internal extensions
for iep in self.internal_extensions:
@ -89,17 +89,25 @@ class RegistrableExtensionManager(ExtensionManager):
#: Provider manager
provider_manager = RegistrableExtensionManager('subliminal.providers', [
'addic7ed = subliminal.providers.addic7ed:Addic7edProvider',
'argenteam = subliminal.providers.argenteam:ArgenteamProvider',
'legendastv = subliminal.providers.legendastv:LegendasTVProvider',
'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider',
'opensubtitlesvip = subliminal.providers.opensubtitles:OpenSubtitlesVipProvider',
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
'shooter = subliminal.providers.shooter:ShooterProvider',
'subscenter = subliminal.providers.subscenter:SubsCenterProvider',
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider'
])
#: Disabled providers
disabled_providers = ['opensubtitlesvip']
#: Default enabled providers
default_providers = [p for p in provider_manager.names() if p not in disabled_providers]
#: Refiner manager
refiner_manager = RegistrableExtensionManager('subliminal.refiners', [
'hash = subliminal.refiners.hash:refine',
'metadata = subliminal.refiners.metadata:refine',
'omdb = subliminal.refiners.omdb:refine',
'tvdb = subliminal.refiners.tvdb:refine'

View file

@ -0,0 +1,229 @@
# -*- coding: utf-8 -*-
from rebulk.loose import ensure_list
from .score import get_equivalent_release_groups, score_keys
from .video import Episode, Movie
from .utils import sanitize, sanitize_release_group
def series_matches(video, title=None, **kwargs):
"""Whether the `video` matches the series title.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str title: the series name.
:return: whether there's a match
:rtype: bool
"""
if isinstance(video, Episode):
return video.series and sanitize(title) in (
sanitize(name) for name in [video.series] + video.alternative_series
)
def title_matches(video, title=None, episode_title=None, **kwargs):
"""Whether the movie matches the movie `title` or the series matches the `episode_title`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str title: the movie title.
:param str episode_title: the series episode title.
:return: whether there's a match
:rtype: bool
"""
if isinstance(video, Episode):
return video.title and sanitize(episode_title) == sanitize(video.title)
if isinstance(video, Movie):
return video.title and sanitize(title) == sanitize(video.title)
def season_matches(video, season=None, **kwargs):
"""Whether the episode matches the `season`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param int season: the episode season.
:return: whether there's a match
:rtype: bool
"""
if isinstance(video, Episode):
return video.season and season == video.season
def episode_matches(video, episode=None, **kwargs):
"""Whether the episode matches the `episode`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param episode: the episode season.
:type: list of int or int
:return: whether there's a match
:rtype: bool
"""
if isinstance(video, Episode):
return video.episodes and ensure_list(episode) == video.episodes
def year_matches(video, year=None, partial=False, **kwargs):
"""Whether the video matches the `year`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param int year: the video year.
:param bool partial: whether or not the guess is partial.
:return: whether there's a match
:rtype: bool
"""
if video.year and year == video.year:
return True
if isinstance(video, Episode):
# count "no year" as an information
return not partial and video.original_series and not year
def country_matches(video, country=None, partial=False, **kwargs):
"""Whether the video matches the `country`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param country: the video country.
:type country: :class:`~babelfish.country.Country`
:param bool partial: whether or not the guess is partial.
:return: whether there's a match
:rtype: bool
"""
if video.country and country == video.country:
return True
if isinstance(video, Episode):
# count "no country" as an information
return not partial and video.original_series and not country
if isinstance(video, Movie):
# count "no country" as an information
return not video.country and not country
def release_group_matches(video, release_group=None, **kwargs):
"""Whether the video matches the `release_group`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str release_group: the video release group.
:return: whether there's a match
:rtype: bool
"""
return (video.release_group and release_group and
any(r in sanitize_release_group(release_group)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group))))
def streaming_service_matches(video, streaming_service=None, **kwargs):
"""Whether the video matches the `streaming_service`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str streaming_service: the video streaming service
:return: whether there's a match
:rtype: bool
"""
return video.streaming_service and streaming_service == video.streaming_service
def resolution_matches(video, screen_size=None, **kwargs):
"""Whether the video matches the `resolution`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str screen_size: the video resolution
:return: whether there's a match
:rtype: bool
"""
return video.resolution and screen_size == video.resolution
def source_matches(video, source=None, **kwargs):
"""Whether the video matches the `source`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str source: the video source
:return: whether there's a match
:rtype: bool
"""
return video.source and source == video.source
def video_codec_matches(video, video_codec=None, **kwargs):
"""Whether the video matches the `video_codec`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str video_codec: the video codec
:return: whether there's a match
:rtype: bool
"""
return video.video_codec and video_codec == video.video_codec
def audio_codec_matches(video, audio_codec=None, **kwargs):
"""Whether the video matches the `audio_codec`.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param str audio_codec: the video audio codec
:return: whether there's a match
:rtype: bool
"""
return video.audio_codec and audio_codec == video.audio_codec
#: Available matches functions
matches_manager = {
'series': series_matches,
'title': title_matches,
'season': season_matches,
'episode': episode_matches,
'year': year_matches,
'country': country_matches,
'release_group': release_group_matches,
'streaming_service': streaming_service_matches,
'resolution': resolution_matches,
'source': source_matches,
'video_codec': video_codec_matches,
'audio_codec': audio_codec_matches
}
def guess_matches(video, guess, partial=False):
"""Get matches between a `video` and a `guess`.
If a guess is `partial`, the absence information won't be counted as a match.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param guess: the guess.
:type guess: dict
:param bool partial: whether or not the guess is partial.
:return: matches between the `video` and the `guess`.
:rtype: set
"""
matches = set()
for key in score_keys:
if key in matches_manager and matches_manager[key](video, partial=partial, **guess):
matches.add(key)
return matches

View file

@ -4,6 +4,7 @@ import logging
from bs4 import BeautifulSoup, FeatureNotFound
from six.moves.xmlrpc_client import SafeTransport
from .. import __short_version__
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
@ -68,6 +69,12 @@ class Provider(object):
#: Required hash, if any
required_hash = None
#: Subtitle class to use
subtitle_class = None
#: User Agent to use
user_agent = 'Subliminal/%s' % __short_version__
def __enter__(self):
self.initialize()
return self
@ -111,13 +118,41 @@ class Provider(object):
:rtype: bool
"""
if not isinstance(video, cls.video_types):
if not cls.check_types(video):
return False
if cls.required_hash is not None and cls.required_hash not in video.hashes:
return False
return True
@classmethod
def check_types(cls, video):
"""Check if the `video` type is supported by the provider.
The `video` is considered invalid if not an instance of :attr:`video_types`.
:param video: the video to check.
:type video: :class:`~subliminal.video.Video`
:return: `True` if the `video` is valid, `False` otherwise.
:rtype: bool
"""
return isinstance(video, cls.video_types)
@classmethod
def check_languages(cls, languages):
"""Check if the `languages` are supported by the provider.
A subset of the supported languages is returned.
:param languages: the languages to check.
:type languages: set of :class:`~babelfish.language.Language`
:return: subset of the supported languages.
:rtype: set of :class:`~babelfish.language.Language`
"""
return cls.languages & languages
def query(self, *args, **kwargs):
"""Query the provider for subtitles.

View file

@ -7,20 +7,22 @@ from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, TooManyRequests
from ..score import get_equivalent_release_groups
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize, sanitize_release_group
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..utils import sanitize
from ..video import Episode
logger = logging.getLogger(__name__)
language_converters.register('addic7ed = subliminal.converters.addic7ed:Addic7edConverter')
# Series cell matching regex
show_cells_re = re.compile(b'<td class="version">.*?</td>', re.DOTALL)
#: Series header parsing regex
series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),&!?-]+?)(?: \((?P<year>\d{4})\))?$')
series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),*&!?-]+?)(?: \((?P<year>\d{4})\))?$')
class Addic7edSubtitle(Subtitle):
@ -29,7 +31,7 @@ class Addic7edSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, year, version,
download_link):
super(Addic7edSubtitle, self).__init__(language, hearing_impaired, page_link)
super(Addic7edSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
@ -42,37 +44,31 @@ class Addic7edSubtitle(Subtitle):
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
@property
def info(self):
return '{series}{yopen}{year}{yclose} s{season:02d}e{episode:02d}{topen}{title}{tclose}{version}'.format(
series=self.series, season=self.season, episode=self.episode, title=self.title, year=self.year or '',
version=self.version, yopen=' (' if self.year else '', yclose=')' if self.year else '',
topen=' - ' if self.title else '', tclose=' - ' if self.version else ''
)
def get_matches(self, video):
# series name
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'episode_title': self.title,
'year': self.year,
'release_group': self.version,
})
# series
if video.series and sanitize(self.series) == sanitize(video.series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# title
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# release_group
if (video.release_group and self.version and
any(r in sanitize_release_group(self.version)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
# format
if video.format and self.version and video.format.lower() in self.version.lower():
matches.add('format')
# other properties
matches |= guess_matches(video, guessit(self.version), partial=True)
if self.version:
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
return matches
@ -86,21 +82,23 @@ class Addic7edProvider(Provider):
]}
video_types = (Episode,)
server_url = 'http://www.addic7ed.com/'
subtitle_class = Addic7edSubtitle
def __init__(self, username=None, password=None):
if username is not None and password is None or username is None and password is not None:
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
# login
if self.username is not None and self.password is not None:
if self.username and self.password:
logger.info('Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)
@ -134,7 +132,16 @@ class Addic7edProvider(Provider):
logger.info('Getting show ids')
r = self.session.get(self.server_url + 'shows.php', timeout=10)
r.raise_for_status()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# LXML parser seems to fail when parsing Addic7ed.com HTML markup.
# Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
# Assuming the site's markup is bad, and stripping it down to only contain what's needed.
show_cells = re.findall(show_cells_re, r.content)
if show_cells:
soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
else:
# If RegEx fails, fall back to original r.content and use 'html.parser'
soup = ParserBeautifulSoup(r.content, ['html.parser'])
# populate the show ids
show_ids = {}
@ -164,10 +171,8 @@ class Addic7edProvider(Provider):
# make the search
logger.info('Searching show ids with %r', params)
r = self.session.get(self.server_url + 'search.php', params=params, timeout=10)
r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10)
r.raise_for_status()
if r.status_code == 304:
raise TooManyRequests()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# get the suggestion
@ -218,24 +223,23 @@ class Addic7edProvider(Provider):
# search as last resort
if not show_id:
logger.warning('Series not found in show ids')
logger.warning('Series %s not found in show ids', series)
show_id = self._search_show_id(series)
return show_id
def query(self, series, season, year=None, country=None):
# get the show id
show_id = self.get_show_id(series, year, country)
if show_id is None:
logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country})
return []
def query(self, show_id, series, season, year=None, country=None):
# get the page of the season of the show
logger.info('Getting the page of show id %d, season %d', show_id, season)
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
r.raise_for_status()
if r.status_code == 304:
raise TooManyRequests()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over subtitle rows
@ -262,16 +266,32 @@ class Addic7edProvider(Provider):
version = cells[4].text
download_link = cells[9].a['href'][1:]
subtitle = Addic7edSubtitle(language, hearing_impaired, page_link, series, season, episode, title, year,
version, download_link)
subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year,
version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
return [s for s in self.query(video.series, video.season, video.year)
if s.language in languages and s.episode == video.episode]
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.get_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download the subtitle
@ -280,6 +300,12 @@ class Addic7edProvider(Provider):
timeout=10)
r.raise_for_status()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('Unable to download subtitle. No data returned from provider')
return
# detect download limit exceeded
if r.headers['Content-Type'] == 'text/html':
raise DownloadLimitExceeded

View file

@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
import io
import json
import logging
from zipfile import ZipFile
from babelfish import Language
from guessit import guessit
from requests import Session
from six.moves import urllib
from . import Provider
from ..cache import EPISODE_EXPIRATION_TIME, region
from ..exceptions import ProviderError
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode
logger = logging.getLogger(__name__)
class ArgenteamSubtitle(Subtitle):
provider_name = 'argenteam'
def __init__(self, language, download_link, series, season, episode, release, version):
super(ArgenteamSubtitle, self).__init__(language, download_link)
self.download_link = download_link
self.series = series
self.season = season
self.episode = episode
self.release = release
self.version = version
@property
def id(self):
return self.download_link
@property
def info(self):
return urllib.parse.unquote(self.download_link.rsplit('/')[-1])
def get_matches(self, video):
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'release_group': self.version
})
# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
return matches
class ArgenteamProvider(Provider):
provider_name = 'argenteam'
language = Language.fromalpha2('es')
languages = {language}
video_types = (Episode,)
server_url = "http://argenteam.net/api/v1/"
subtitle_class = ArgenteamSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def search_episode_id(self, series, season, episode):
"""Search the episode id from the `series`, `season` and `episode`.
:param str series: series of the episode.
:param int season: season of the episode.
:param int episode: episode number.
:return: the episode id, if any.
:rtype: int or None
"""
# make the search
query = '%s S%#02dE%#02d' % (series, season, episode)
logger.info('Searching episode id for %r', query)
r = self.session.get(self.server_url + 'search', params={'q': query}, timeout=10)
r.raise_for_status()
results = json.loads(r.text)
if results['total'] == 1:
return results['results'][0]['id']
logger.error('No episode id found for %r', series)
def query(self, series, season, episode):
episode_id = self.search_episode_id(series, season, episode)
if episode_id is None:
return []
response = self.session.get(self.server_url + 'episode', params={'id': episode_id}, timeout=10)
response.raise_for_status()
content = json.loads(response.text)
subtitles = []
for r in content['releases']:
for s in r['subtitles']:
subtitle = self.subtitle_class(self.language, s['uri'], series, season, episode, r['team'], r['tags'])
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
titles = [video.series] + video.alternative_series
for title in titles:
subs = self.query(title, video.season, video.episode)
if subs:
return subs
return []
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, timeout=10)
r.raise_for_status()
# open the zip
with ZipFile(io.BytesIO(r.content)) as zf:
if len(zf.namelist()) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))

View file

@ -12,14 +12,16 @@ from guessit import guessit
import pytz
import rarfile
from rarfile import RarFile, is_rarfile
from rebulk.loose import ensure_list
from requests import Session
from zipfile import ZipFile, is_zipfile
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError
from ..subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches, sanitize
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError, ServiceUnavailable
from ..matches import guess_matches
from ..subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending
from ..utils import sanitize
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
@ -44,8 +46,11 @@ rating_re = re.compile(r'nota (?P<rating>\d+)')
#: Timestamp parsing regex
timestamp_re = re.compile(r'(?P<day>\d+)/(?P<month>\d+)/(?P<year>\d+) - (?P<hour>\d+):(?P<minute>\d+)')
#: Title with year/country regex
title_re = re.compile(r'^(?P<series>.*?)(?: \((?:(?P<year>\d{4})|(?P<country>[A-Z]{2}))\))?$')
#: Cache key for releases
releases_key = __name__ + ':releases|{archive_id}'
releases_key = __name__ + ':releases|{archive_id}|{archive_name}'
class LegendasTVArchive(object):
@ -60,8 +65,8 @@ class LegendasTVArchive(object):
:param int rating: rating (0-10).
:param timestamp: timestamp.
:type timestamp: datetime.datetime
"""
def __init__(self, id, name, pack, featured, link, downloads=0, rating=0, timestamp=None):
#: Identifier
self.id = id
@ -96,10 +101,11 @@ class LegendasTVArchive(object):
class LegendasTVSubtitle(Subtitle):
"""LegendasTV Subtitle."""
provider_name = 'legendastv'
def __init__(self, language, type, title, year, imdb_id, season, archive, name):
super(LegendasTVSubtitle, self).__init__(language, archive.link)
super(LegendasTVSubtitle, self).__init__(language, page_link=archive.link)
self.type = type
self.title = title
self.year = year
@ -112,40 +118,28 @@ class LegendasTVSubtitle(Subtitle):
def id(self):
return '%s-%s' % (self.archive.id, self.name.lower())
@property
def info(self):
return self.name
def get_matches(self, video, hearing_impaired=False):
matches = set()
matches = guess_matches(video, {
'title': self.title,
'year': self.year
})
# episode
if isinstance(video, Episode) and self.type == 'episode':
# series
if video.series and sanitize(self.title) == sanitize(video.series):
matches.add('series')
# year (year is based on season air date hence the adjustment)
if video.original_series and self.year is None or video.year and video.year == self.year - self.season + 1:
matches.add('year')
# imdb_id
if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
matches.add('series_imdb_id')
# movie
elif isinstance(video, Movie) and self.type == 'movie':
# title
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
if video.year and self.year == video.year:
matches.add('year')
# imdb_id
if video.imdb_id and self.imdb_id == video.imdb_id:
matches.add('imdb_id')
# archive name
matches |= guess_matches(video, guessit(self.archive.name, {'type': self.type}))
# name
matches |= guess_matches(video, guessit(self.name, {'type': self.type}))
@ -157,29 +151,38 @@ class LegendasTVProvider(Provider):
:param str username: username.
:param str password: password.
"""
languages = {Language.fromlegendastv(l) for l in language_converters['legendastv'].codes}
server_url = 'http://legendas.tv/'
subtitle_class = LegendasTVSubtitle
def __init__(self, username=None, password=None):
if username and not password or not username and password:
# Provider needs UNRAR installed. If not available raise ConfigurationError
try:
rarfile.custom_check([rarfile.UNRAR_TOOL], True)
except rarfile.RarExecError:
raise ConfigurationError('UNRAR tool not available')
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
# login
if self.username is not None and self.password is not None:
if self.username and self.password:
logger.info('Logging in')
data = {'_method': 'POST', 'data[User][username]': self.username, 'data[User][password]': self.password}
r = self.session.post(self.server_url + 'login', data, allow_redirects=False, timeout=10)
r.raise_for_status()
raise_for_status(r)
soup = ParserBeautifulSoup(r.content, ['html.parser'])
if soup.find('div', {'class': 'alert-error'}, string=re.compile(u'Usuário ou senha inválidos')):
@ -193,94 +196,174 @@ class LegendasTVProvider(Provider):
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'users/logout', allow_redirects=False, timeout=10)
r.raise_for_status()
raise_for_status(r)
logger.debug('Logged out')
self.logged_in = False
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def search_titles(self, title):
@staticmethod
def is_valid_title(title, title_id, sanitized_title, season, year):
"""Check if is a valid title."""
sanitized_result = sanitize(title['title'])
if sanitized_result != sanitized_title:
logger.debug("Mismatched title, discarding title %d (%s)",
title_id, sanitized_result)
return
# episode type
if season:
# discard mismatches on type
if title['type'] != 'episode':
logger.debug("Mismatched 'episode' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on season
if 'season' not in title or title['season'] != season:
logger.debug('Mismatched season %s, discarding title %d (%s)',
title.get('season'), title_id, sanitized_result)
return
# movie type
else:
# discard mismatches on type
if title['type'] != 'movie':
logger.debug("Mismatched 'movie' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on year
if year is not None and 'year' in title and title['year'] != year:
logger.debug("Mismatched movie year, discarding title %d (%s)", title_id, sanitized_result)
return
return True
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def search_titles(self, title, season, title_year):
"""Search for titles matching the `title`.
For episodes, each season has it own title
:param str title: the title to search for.
:param int season: season of the title
:param int title_year: year of the title
:return: found titles.
:rtype: dict
"""
# make the query
logger.info('Searching title %r', title)
r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(title), timeout=10)
r.raise_for_status()
results = json.loads(r.text)
# loop over results
titles = {}
for result in results:
source = result['_source']
sanitized_titles = [sanitize(title)]
ignore_characters = {'\'', '.'}
if any(c in title for c in ignore_characters):
sanitized_titles.append(sanitize(title, ignore_characters=ignore_characters))
# extract id
title_id = int(source['id_filme'])
for sanitized_title in sanitized_titles:
# make the query
if season:
logger.info('Searching episode title %r for season %r', sanitized_title, season)
else:
logger.info('Searching movie title %r', sanitized_title)
# extract type and title
title = {'type': type_map[source['tipo']], 'title': source['dsc_nome']}
r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10)
raise_for_status(r)
results = json.loads(r.text)
# extract year
if source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit():
title['year'] = int(source['dsc_data_lancamento'])
# loop over results
for result in results:
source = result['_source']
# extract imdb_id
if source['id_imdb'] != '0':
if not source['id_imdb'].startswith('tt'):
title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
else:
title['imdb_id'] = source['id_imdb']
# extract id
title_id = int(source['id_filme'])
# extract season
if title['type'] == 'episode':
if source['temporada'] and source['temporada'].isdigit():
title['season'] = int(source['temporada'])
else:
match = season_re.search(source['dsc_nome_br'])
if match:
title['season'] = int(match.group('season'))
# extract type
title = {'type': type_map[source['tipo']]}
# extract title, year and country
name, year, country = title_re.match(source['dsc_nome']).groups()
title['title'] = name
# extract imdb_id
if source['id_imdb'] != '0':
if not source['id_imdb'].startswith('tt'):
title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
else:
logger.warning('No season detected for title %d', title_id)
title['imdb_id'] = source['id_imdb']
# add title
titles[title_id] = title
# extract season
if title['type'] == 'episode':
if source['temporada'] and source['temporada'].isdigit():
title['season'] = int(source['temporada'])
else:
match = season_re.search(source['dsc_nome_br'])
if match:
title['season'] = int(match.group('season'))
else:
logger.debug('No season detected for title %d (%s)', title_id, name)
logger.debug('Found %d titles', len(titles))
# extract year
if year:
title['year'] = int(year)
elif source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit():
# year is based on season air date hence the adjustment
title['year'] = int(source['dsc_data_lancamento']) - title.get('season', 1) + 1
# add title only if is valid
# Check against title without ignored chars
if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year):
titles[title_id] = title
logger.debug('Found %d titles', len(titles))
return titles
@region.cache_on_arguments(expiration_time=timedelta(minutes=15).total_seconds())
def get_archives(self, title_id, language_code):
"""Get the archive list from a given `title_id` and `language_code`.
def get_archives(self, title_id, language_code, title_type, season, episodes):
"""Get the archive list from a given `title_id`, `language_code`, `title_type`, `season` and `episode`.
:param int title_id: title id.
:param int language_code: language code.
:param str title_type: episode or movie
:param int season: season
:param list episodes: episodes
:return: the archives.
:rtype: list of :class:`LegendasTVArchive`
"""
logger.info('Getting archives for title %d and language %d', title_id, language_code)
archives = []
page = 1
page = 0
while True:
# get the archive page
url = self.server_url + 'util/carrega_legendas_busca_filme/{title}/{language}/-/{page}'.format(
title=title_id, language=language_code, page=page)
url = self.server_url + 'legenda/busca/-/{language}/-/{page}/{title}'.format(
language=language_code, page=page, title=title_id)
r = self.session.get(url)
r.raise_for_status()
raise_for_status(r)
# parse the results
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
for archive_soup in soup.select('div.list_element > article > div'):
for archive_soup in soup.select('div.list_element > article > div > div.f_left'):
# create archive
archive = LegendasTVArchive(archive_soup.a['href'].split('/')[2], archive_soup.a.text,
'pack' in archive_soup['class'], 'destaque' in archive_soup['class'],
archive = LegendasTVArchive(archive_soup.a['href'].split('/')[2],
archive_soup.a.text,
'pack' in archive_soup.parent['class'],
'destaque' in archive_soup.parent['class'],
self.server_url + archive_soup.a['href'][1:])
# clean name of path separators and pack flags
clean_name = archive.name.replace('/', '-')
if archive.pack and clean_name.startswith('(p)'):
clean_name = clean_name[3:]
# guess from name
guess = guessit(clean_name, {'type': title_type})
# episode
if season and episodes:
# discard mismatches on episode in non-pack archives
# Guessit may return int for single episode or list for multi-episode
# Check if archive name has multiple episodes releases on it
if not archive.pack and 'episode' in guess:
wanted_episode = set(episodes)
archive_episode = set(ensure_list(guess['episode']))
if not wanted_episode.intersection(archive_episode):
logger.debug('Mismatched episode %s, discarding archive: %s', guess['episode'], clean_name)
continue
# extract text containing downloads, rating and timestamp
data_text = archive_soup.find('p', class_='data').text
@ -300,6 +383,8 @@ class LegendasTVProvider(Provider):
raise ProviderError('Archive timestamp is in the future')
# add archive
logger.info('Found archive for title %d and language %d at page %s: %s',
title_id, language_code, page, archive)
archives.append(archive)
# stop on last page
@ -322,7 +407,7 @@ class LegendasTVProvider(Provider):
"""
logger.info('Downloading archive %s', archive.id)
r = self.session.get(self.server_url + 'downloadarquivo/{}'.format(archive.id))
r.raise_for_status()
raise_for_status(r)
# open the archive
archive_stream = io.BytesIO(r.content)
@ -335,62 +420,28 @@ class LegendasTVProvider(Provider):
else:
raise ValueError('Not a valid archive')
def query(self, language, title, season=None, episode=None, year=None):
def query(self, language, title, season=None, episodes=None, year=None):
# search for titles
titles = self.search_titles(sanitize(title))
# search for titles with the quote or dot character
ignore_characters = {'\'', '.'}
if any(c in title for c in ignore_characters):
titles.update(self.search_titles(sanitize(title, ignore_characters=ignore_characters)))
titles = self.search_titles(title, season, year)
subtitles = []
# iterate over titles
for title_id, t in titles.items():
# discard mismatches on title
if sanitize(t['title']) != sanitize(title):
continue
# episode
if season and episode:
# discard mismatches on type
if t['type'] != 'episode':
continue
# discard mismatches on season
if 'season' not in t or t['season'] != season:
continue
# movie
else:
# discard mismatches on type
if t['type'] != 'movie':
continue
# discard mismatches on year
if year is not None and 'year' in t and t['year'] != year:
continue
logger.info('Getting archives for title %d and language %d', title_id, language.legendastv)
archives = self.get_archives(title_id, language.legendastv, t['type'], season, episodes or [])
if not archives:
logger.info('No archives found for title %d and language %d', title_id, language.legendastv)
# iterate over title's archives
for a in self.get_archives(title_id, language.legendastv):
# clean name of path separators and pack flags
clean_name = a.name.replace('/', '-')
if a.pack and clean_name.startswith('(p)'):
clean_name = clean_name[3:]
# guess from name
guess = guessit(clean_name, {'type': t['type']})
# episode
if season and episode:
# discard mismatches on episode in non-pack archives
if not a.pack and 'episode' in guess and guess['episode'] != episode:
continue
for a in archives:
# compute an expiration time based on the archive timestamp
expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds()
# attempt to get the releases from the cache
releases = region.get(releases_key.format(archive_id=a.id), expiration_time=expiration_time)
cache_key = releases_key.format(archive_id=a.id, archive_name=a.name)
releases = region.get(cache_key, expiration_time=expiration_time)
# the releases are not in cache or cache is expired
if releases == NO_VALUE:
@ -417,27 +468,34 @@ class LegendasTVProvider(Provider):
releases.append(name)
# cache the releases
region.set(releases_key.format(archive_id=a.id), releases)
region.set(cache_key, releases)
# iterate over releases
for r in releases:
subtitle = LegendasTVSubtitle(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
t.get('season'), a, r)
subtitle = self.subtitle_class(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
t.get('season'), a, r)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
season = None
episodes = []
if isinstance(video, Episode):
title = video.series
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
episodes = video.episodes
else:
title = video.title
titles = [video.title] + video.alternative_titles
return [s for l in languages for s in self.query(l, title, season=season, episode=episode, year=video.year)]
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episodes=episodes, year=video.year)]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
# download archive in case we previously hit the releases cache and didn't download it
@ -446,3 +504,11 @@ class LegendasTVProvider(Provider):
# extract subtitle's content
subtitle.content = fix_line_ending(subtitle.archive.content.read(subtitle.name))
def raise_for_status(r):
# When site is under maintaince and http status code 200.
if 'Em breve estaremos de volta' in r.text:
raise ServiceUnavailable
else:
r.raise_for_status()

View file

@ -5,7 +5,6 @@ from babelfish import Language
from requests import Session
from . import Provider
from .. import __short_version__
from ..subtitle import Subtitle
logger = logging.getLogger(__name__)
@ -42,11 +41,16 @@ class NapiProjektSubtitle(Subtitle):
def __init__(self, language, hash):
super(NapiProjektSubtitle, self).__init__(language)
self.hash = hash
self.content = None
@property
def id(self):
return self.hash
@property
def info(self):
return self.hash
def get_matches(self, video):
matches = set()
@ -62,10 +66,14 @@ class NapiProjektProvider(Provider):
languages = {Language.fromalpha2(l) for l in ['pl']}
required_hash = 'napiprojekt'
server_url = 'http://napiprojekt.pl/unit_napisy/dl.php'
subtitle_class = NapiProjektSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -81,16 +89,16 @@ class NapiProjektProvider(Provider):
'f': hash,
't': get_subhash(hash)}
logger.info('Searching subtitle %r', params)
response = self.session.get(self.server_url, params=params, timeout=10)
response.raise_for_status()
r = self.session.get(self.server_url, params=params, timeout=10)
r.raise_for_status()
# handle subtitles not found and errors
if response.content[:4] == b'NPc0':
if r.content[:4] == b'NPc0':
logger.debug('No subtitles found')
return None
subtitle = NapiProjektSubtitle(language, hash)
subtitle.content = response.content
subtitle = self.subtitle_class(language, hash)
subtitle.content = r.content
logger.debug('Found subtitle %r', subtitle)
return subtitle

View file

@ -11,9 +11,10 @@ from six.moves.xmlrpc_client import ServerProxy
from . import Provider, TimeoutSafeTransport
from .. import __short_version__
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..exceptions import (AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError,
ServiceUnavailable)
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
@ -26,7 +27,8 @@ class OpenSubtitlesSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name,
movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, filename, encoding):
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired, page_link, encoding)
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired=hearing_impaired,
page_link=page_link, encoding=encoding)
self.subtitle_id = subtitle_id
self.matched_by = matched_by
self.movie_kind = movie_kind
@ -43,6 +45,14 @@ class OpenSubtitlesSubtitle(Subtitle):
def id(self):
return str(self.subtitle_id)
@property
def info(self):
if not self.filename and not self.movie_release_name:
return self.subtitle_id
if self.movie_release_name and len(self.movie_release_name) > len(self.filename):
return self.movie_release_name
return self.filename
@property
def series_name(self):
return self.series_re.match(self.movie_name).group('series_name')
@ -52,60 +62,39 @@ class OpenSubtitlesSubtitle(Subtitle):
return self.series_re.match(self.movie_name).group('series_title')
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode) and self.movie_kind == 'episode':
# tag match, assume series, year, season and episode matches
if self.matched_by == 'tag':
matches |= {'series', 'year', 'season', 'episode'}
# series
if video.series and sanitize(self.series_name) == sanitize(video.series):
matches.add('series')
# year
if video.original_series and self.movie_year is None or video.year and video.year == self.movie_year:
matches.add('year')
# season
if video.season and self.series_season == video.season:
matches.add('season')
# episode
if video.episode and self.series_episode == video.episode:
matches.add('episode')
# title
if video.title and sanitize(self.series_title) == sanitize(video.title):
matches.add('title')
# guess
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': 'episode'}))
matches |= guess_matches(video, guessit(self.filename, {'type': 'episode'}))
# hash
if 'opensubtitles' in video.hashes and self.hash == video.hashes['opensubtitles']:
if 'series' in matches and 'season' in matches and 'episode' in matches:
matches.add('hash')
else:
logger.debug('Match on hash discarded')
# movie
elif isinstance(video, Movie) and self.movie_kind == 'movie':
# tag match, assume title and year matches
if self.matched_by == 'tag':
matches |= {'title', 'year'}
# title
if video.title and sanitize(self.movie_name) == sanitize(video.title):
matches.add('title')
# year
if video.year and self.movie_year == video.year:
matches.add('year')
# guess
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': 'movie'}))
matches |= guess_matches(video, guessit(self.filename, {'type': 'movie'}))
# hash
if 'opensubtitles' in video.hashes and self.hash == video.hashes['opensubtitles']:
if 'title' in matches:
matches.add('hash')
else:
logger.debug('Match on hash discarded')
else:
if (isinstance(video, Episode) and self.movie_kind != 'episode') or (
isinstance(video, Movie) and self.movie_kind != 'movie'):
logger.info('%r is not a valid movie_kind', self.movie_kind)
return matches
return set()
matches = guess_matches(video, {
'title': self.series_name if self.movie_kind == 'episode' else self.movie_name,
'episode_title': self.series_title if self.movie_kind == 'episode' else None,
'year': self.movie_year,
'season': self.series_season,
'episode': self.series_episode
})
# tag
if self.matched_by == 'tag':
if not video.imdb_id or self.movie_imdb_id == video.imdb_id:
if self.movie_kind == 'episode':
matches |= {'series', 'year', 'season', 'episode'}
elif self.movie_kind == 'movie':
matches |= {'title', 'year'}
# guess
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': self.movie_kind}))
matches |= guess_matches(video, guessit(self.filename, {'type': self.movie_kind}))
# hash
if 'opensubtitles' in video.hashes and self.hash == video.hashes['opensubtitles']:
if self.movie_kind == 'movie' and 'title' in matches:
matches.add('hash')
elif self.movie_kind == 'episode' and 'series' in matches and 'season' in matches and 'episode' in matches:
matches.add('hash')
else:
logger.debug('Match on hash discarded')
# imdb_id
if video.imdb_id and self.movie_imdb_id == video.imdb_id:
@ -122,10 +111,13 @@ class OpenSubtitlesProvider(Provider):
"""
languages = {Language.fromopensubtitles(l) for l in language_converters['opensubtitles'].codes}
server_url = 'https://api.opensubtitles.org/xml-rpc'
subtitle_class = OpenSubtitlesSubtitle
user_agent = 'subliminal v%s' % __short_version__
def __init__(self, username=None, password=None):
self.server = ServerProxy('https://api.opensubtitles.org/xml-rpc', TimeoutSafeTransport(10))
if username and not password or not username and password:
self.server = ServerProxy(self.server_url, TimeoutSafeTransport(10))
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
# None values not allowed for logging in, so replace it by ''
self.username = username or ''
@ -134,8 +126,7 @@ class OpenSubtitlesProvider(Provider):
def initialize(self):
logger.info('Logging in')
response = checked(self.server.LogIn(self.username, self.password, 'eng',
'subliminal v%s' % __short_version__))
response = checked(self.server.LogIn(self.username, self.password, 'eng', self.user_agent))
self.token = response['token']
logger.debug('Logged in with token %r', self.token)
@ -156,7 +147,10 @@ class OpenSubtitlesProvider(Provider):
if hash and size:
criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
if imdb_id:
criteria.append({'imdbid': imdb_id[2:]})
if season and episode:
criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode})
else:
criteria.append({'imdbid': imdb_id[2:]})
if tag:
criteria.append({'tag': tag})
if query and season and episode:
@ -199,9 +193,9 @@ class OpenSubtitlesProvider(Provider):
filename = subtitle_item['SubFileName']
encoding = subtitle_item.get('SubEncoding') or None
subtitle = OpenSubtitlesSubtitle(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind,
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, filename, encoding)
subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind,
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, filename, encoding)
logger.debug('Found subtitle %r by %s', subtitle, matched_by)
subtitles.append(subtitle)
@ -225,6 +219,17 @@ class OpenSubtitlesProvider(Provider):
subtitle.content = fix_line_ending(zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
class OpenSubtitlesVipSubtitle(OpenSubtitlesSubtitle):
"""OpenSubtitles Subtitle."""
provider_name = 'opensubtitlesvip'
class OpenSubtitlesVipProvider(OpenSubtitlesProvider):
"""OpenSubtitles Provider using VIP url."""
server_url = 'https://vip-api.opensubtitles.org/xml-rpc'
subtitle_class = OpenSubtitlesVipSubtitle
class OpenSubtitlesError(ProviderError):
"""Base class for non-generic :class:`OpenSubtitlesProvider` exceptions."""
pass
@ -260,11 +265,6 @@ class DisabledUserAgent(OpenSubtitlesError, AuthenticationError):
pass
class ServiceUnavailable(OpenSubtitlesError):
"""Exception raised when status is '503 Service Unavailable'."""
pass
def checked(response):
"""Check a response status before returning it.

View file

@ -16,11 +16,10 @@ from requests import Session
from zipfile import ZipFile
from . import Provider
from .. import __short_version__
from ..exceptions import ProviderError
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..video import Episode, Movie
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode
logger = logging.getLogger(__name__)
@ -31,7 +30,7 @@ class PodnapisiSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, pid, releases, title, season=None, episode=None,
year=None):
super(PodnapisiSubtitle, self).__init__(language, hearing_impaired, page_link)
super(PodnapisiSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.pid = pid
self.releases = releases
self.title = title
@ -43,37 +42,21 @@ class PodnapisiSubtitle(Subtitle):
def id(self):
return self.pid
def get_matches(self, video):
matches = set()
@property
def info(self):
return ' '.join(self.releases) or self.pid
# episode
if isinstance(video, Episode):
# series
if video.series and sanitize(self.title) == sanitize(video.series):
matches.add('series')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'episode'}))
# movie
elif isinstance(video, Movie):
# title
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
if video.year and self.year == video.year:
matches.add('year')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'movie'}))
def get_matches(self, video):
matches = guess_matches(video, {
'title': self.title,
'year': self.year,
'season': self.season,
'episode': self.episode
})
video_type = 'episode' if isinstance(video, Episode) else 'movie'
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': video_type}))
return matches
@ -82,11 +65,15 @@ class PodnapisiProvider(Provider):
"""Podnapisi Provider."""
languages = ({Language('por', 'BR'), Language('srp', script='Latn')} |
{Language.fromalpha2(l) for l in language_converters['alpha2'].codes})
server_url = 'http://podnapisi.net/subtitles/'
server_url = 'https://www.podnapisi.net/subtitles/'
subtitle_class = PodnapisiSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -108,7 +95,9 @@ class PodnapisiProvider(Provider):
pids = set()
while True:
# query the server
xml = etree.fromstring(self.session.get(self.server_url + 'search/old', params=params, timeout=10).content)
r = self.session.get(self.server_url + 'search/old', params=params, timeout=10)
r.raise_for_status()
xml = etree.fromstring(r.content)
# exit if no results
if not int(xml.find('pagination/results').text):
@ -118,10 +107,14 @@ class PodnapisiProvider(Provider):
# loop over subtitles
for subtitle_xml in xml.findall('subtitle'):
# read xml elements
pid = subtitle_xml.find('pid').text
# ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
if pid in pids:
continue
language = Language.fromietf(subtitle_xml.find('language').text)
hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
page_link = subtitle_xml.find('url').text
pid = subtitle_xml.find('pid').text
releases = []
if subtitle_xml.find('release').text:
for release in subtitle_xml.find('release').text.split():
@ -134,15 +127,11 @@ class PodnapisiProvider(Provider):
year = int(subtitle_xml.find('year').text)
if is_episode:
subtitle = PodnapisiSubtitle(language, hearing_impaired, page_link, pid, releases, title,
season=season, episode=episode, year=year)
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
season=season, episode=episode, year=year)
else:
subtitle = PodnapisiSubtitle(language, hearing_impaired, page_link, pid, releases, title,
year=year)
# ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
if pid in pids:
continue
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
year=year)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
@ -159,11 +148,21 @@ class PodnapisiProvider(Provider):
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
if isinstance(video, Episode):
return [s for l in languages for s in self.query(l, video.series, season=video.season,
episode=video.episode, year=video.year)]
elif isinstance(video, Movie):
return [s for l in languages for s in self.query(l, video.title, year=video.year)]
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
else:
titles = [video.title] + video.alternative_titles
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episode=episode, year=video.year)]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
# download as a zip

View file

@ -7,7 +7,6 @@ from babelfish import Language, language_converters
from requests import Session
from . import Provider
from .. import __short_version__
from ..subtitle import Subtitle, fix_line_ending
logger = logging.getLogger(__name__)
@ -28,6 +27,10 @@ class ShooterSubtitle(Subtitle):
def id(self):
return self.download_link
@property
def info(self):
return self.hash
def get_matches(self, video):
matches = set()
@ -42,10 +45,14 @@ class ShooterProvider(Provider):
"""Shooter Provider."""
languages = {Language(l) for l in ['eng', 'zho']}
server_url = 'https://www.shooter.cn/api/subapi.php'
subtitle_class = ShooterSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -64,7 +71,7 @@ class ShooterProvider(Provider):
# parse the subtitles
results = json.loads(r.text)
subtitles = [ShooterSubtitle(language, hash, t['Link']) for s in results for t in s['Files']]
subtitles = [self.subtitle_class(language, hash, t['Link']) for s in results for t in s['Files']]
return subtitles

View file

@ -1,235 +0,0 @@
# -*- coding: utf-8 -*-
import bisect
from collections import defaultdict
import io
import json
import logging
import zipfile
from babelfish import Language
from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
class SubsCenterSubtitle(Subtitle):
"""SubsCenter Subtitle."""
provider_name = 'subscenter'
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, subtitle_id, subtitle_key,
downloaded, releases):
super(SubsCenterSubtitle, self).__init__(language, hearing_impaired, page_link)
self.series = series
self.season = season
self.episode = episode
self.title = title
self.subtitle_id = subtitle_id
self.subtitle_key = subtitle_key
self.downloaded = downloaded
self.releases = releases
@property
def id(self):
return str(self.subtitle_id)
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode):
# series
if video.series and sanitize(self.series) == sanitize(video.series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'episode'}))
# movie
elif isinstance(video, Movie):
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'movie'}))
# title
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
return matches
class SubsCenterProvider(Provider):
"""SubsCenter Provider."""
languages = {Language.fromalpha2(l) for l in ['he']}
server_url = 'http://www.subscenter.co/he/'
def __init__(self, username=None, password=None):
if username is not None and password is None or username is None and password is not None:
raise ConfigurationError('Username and password must be specified')
self.session = None
self.username = username
self.password = password
self.logged_in = False
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
# login
if self.username is not None and self.password is not None:
logger.debug('Logging in')
url = self.server_url + 'subscenter/accounts/login/'
# retrieve CSRF token
self.session.get(url)
csrf_token = self.session.cookies['csrftoken']
# actual login
data = {'username': self.username, 'password': self.password, 'csrfmiddlewaretoken': csrf_token}
r = self.session.post(url, data, allow_redirects=False, timeout=10)
if r.status_code != 302:
raise AuthenticationError(self.username)
logger.info('Logged in')
self.logged_in = True
def terminate(self):
# logout
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'subscenter/accounts/logout/', timeout=10)
r.raise_for_status()
logger.info('Logged out')
self.logged_in = False
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _search_url_titles(self, title):
"""Search the URL titles by kind for the given `title`.
:param str title: title to search for.
:return: the URL titles by kind.
:rtype: collections.defaultdict
"""
# make the search
logger.info('Searching title name for %r', title)
r = self.session.get(self.server_url + 'subtitle/search/', params={'q': title}, timeout=10)
r.raise_for_status()
# check for redirections
if r.history and all([h.status_code == 302 for h in r.history]):
logger.debug('Redirected to the subtitles page')
links = [r.url]
else:
# get the suggestions (if needed)
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
links = [link.attrs['href'] for link in soup.select('#processes div.generalWindowTop a')]
logger.debug('Found %d suggestions', len(links))
url_titles = defaultdict(list)
for link in links:
parts = link.split('/')
url_titles[parts[-3]].append(parts[-2])
return url_titles
def query(self, title, season=None, episode=None):
# search for the url title
url_titles = self._search_url_titles(title)
# episode
if season and episode:
if 'series' not in url_titles:
logger.error('No URL title found for series %r', title)
return []
url_title = url_titles['series'][0]
logger.debug('Using series title %r', url_title)
url = self.server_url + 'cst/data/series/sb/{}/{}/{}/'.format(url_title, season, episode)
page_link = self.server_url + 'subtitle/series/{}/{}/{}/'.format(url_title, season, episode)
else:
if 'movie' not in url_titles:
logger.error('No URL title found for movie %r', title)
return []
url_title = url_titles['movie'][0]
logger.debug('Using movie title %r', url_title)
url = self.server_url + 'cst/data/movie/sb/{}/'.format(url_title)
page_link = self.server_url + 'subtitle/movie/{}/'.format(url_title)
# get the list of subtitles
logger.debug('Getting the list of subtitles')
r = self.session.get(url)
r.raise_for_status()
results = json.loads(r.text)
# loop over results
subtitles = {}
for language_code, language_data in results.items():
for quality_data in language_data.values():
for quality, subtitles_data in quality_data.items():
for subtitle_item in subtitles_data.values():
# read the item
language = Language.fromalpha2(language_code)
hearing_impaired = bool(subtitle_item['hearing_impaired'])
subtitle_id = subtitle_item['id']
subtitle_key = subtitle_item['key']
downloaded = subtitle_item['downloaded']
release = subtitle_item['subtitle_version']
# add the release and increment downloaded count if we already have the subtitle
if subtitle_id in subtitles:
logger.debug('Found additional release %r for subtitle %d', release, subtitle_id)
bisect.insort_left(subtitles[subtitle_id].releases, release) # deterministic order
subtitles[subtitle_id].downloaded += downloaded
continue
# otherwise create it
subtitle = SubsCenterSubtitle(language, hearing_impaired, page_link, title, season, episode,
title, subtitle_id, subtitle_key, downloaded, [release])
logger.debug('Found subtitle %r', subtitle)
subtitles[subtitle_id] = subtitle
return subtitles.values()
def list_subtitles(self, video, languages):
season = episode = None
title = video.title
if isinstance(video, Episode):
title = video.series
season = video.season
episode = video.episode
return [s for s in self.query(title, season, episode) if s.language in languages]
def download_subtitle(self, subtitle):
# download
url = self.server_url + 'subtitle/download/{}/{}/'.format(subtitle.language.alpha2, subtitle.subtitle_id)
params = {'v': subtitle.releases[0], 'key': subtitle.subtitle_key}
r = self.session.get(url, params=params, headers={'Referer': subtitle.page_link}, timeout=10)
r.raise_for_status()
# open the zip
with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
# remove some filenames from the namelist
namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
if len(namelist) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(namelist[0]))

View file

@ -25,6 +25,10 @@ class TheSubDBSubtitle(Subtitle):
def id(self):
return self.hash + '-' + str(self.language)
@property
def info(self):
return self.hash
def get_matches(self, video):
matches = set()
@ -40,11 +44,15 @@ class TheSubDBProvider(Provider):
languages = {Language.fromthesubdb(l) for l in language_converters['thesubdb'].codes}
required_hash = 'thesubdb'
server_url = 'http://api.thesubdb.com/'
subtitle_class = TheSubDBSubtitle
user_agent = 'SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)' % __short_version__
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = ('SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)' %
__short_version__)
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -66,7 +74,7 @@ class TheSubDBProvider(Provider):
for language_code in r.text.split(','):
language = Language.fromthesubdb(language_code)
subtitle = TheSubDBSubtitle(language, hash)
subtitle = self.subtitle_class(language, hash)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)

View file

@ -9,12 +9,10 @@ from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import EPISODE_EXPIRATION_TIME, SHOW_EXPIRATION_TIME, region
from ..exceptions import ProviderError
from ..score import get_equivalent_release_groups
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize, sanitize_release_group
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode
logger = logging.getLogger(__name__)
@ -43,31 +41,24 @@ class TVsubtitlesSubtitle(Subtitle):
def id(self):
return str(self.subtitle_id)
def get_matches(self, video):
matches = set()
@property
def info(self):
return self.release or self.rip
def get_matches(self, video):
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'year': self.year,
'release_group': self.release
})
# series
if video.series and sanitize(self.series) == sanitize(video.series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# release_group
if (video.release_group and self.release and
any(r in sanitize_release_group(self.release)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# other properties
if self.release:
matches |= guess_matches(video, guessit(self.release, {'type': 'episode'}), partial=True)
if self.rip:
matches |= guess_matches(video, guessit(self.rip), partial=True)
matches |= guess_matches(video, guessit(self.rip, {'type': 'episode'}), partial=True)
return matches
@ -80,10 +71,14 @@ class TVsubtitlesProvider(Provider):
]}
video_types = (Episode,)
server_url = 'http://www.tvsubtitles.net/'
subtitle_class = TVsubtitlesSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -158,13 +153,7 @@ class TVsubtitlesProvider(Provider):
return episode_ids
def query(self, series, season, episode, year=None):
# search the show id
show_id = self.search_show_id(series, year)
if show_id is None:
logger.error('No show id found for %r (%r)', series, {'year': year})
return []
def query(self, show_id, series, season, episode, year=None):
# get the episode ids
episode_ids = self.get_episode_ids(show_id, season)
if episode not in episode_ids:
@ -184,9 +173,9 @@ class TVsubtitlesProvider(Provider):
subtitle_id = int(row.parent['href'][10:-5])
page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
rip = row.find('p', title='rip').text.strip() or None
release = row.find('p', title='release').text.strip() or None
release = row.find('h5').text.strip() or None
subtitle = TVsubtitlesSubtitle(language, page_link, subtitle_id, series, season, episode, year, rip,
subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip,
release)
logger.debug('Found subtitle %s', subtitle)
subtitles.append(subtitle)
@ -194,7 +183,24 @@ class TVsubtitlesProvider(Provider):
return subtitles
def list_subtitles(self, video, languages):
return [s for s in self.query(video.series, video.season, video.episode, video.year) if s.language in languages]
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.search_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.episode, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download as a zip

View file

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
import logging
from ..extensions import provider_manager, default_providers
from ..utils import hash_napiprojekt, hash_opensubtitles, hash_shooter, hash_thesubdb
logger = logging.getLogger(__name__)
hash_functions = {
'napiprojekt': hash_napiprojekt,
'opensubtitles': hash_opensubtitles,
'opensubtitlesvip': hash_opensubtitles,
'shooter': hash_shooter,
'thesubdb': hash_thesubdb
}
def refine(video, providers=None, languages=None, **kwargs):
"""Refine a video computing required hashes for the given providers.
The following :class:`~subliminal.video.Video` attribute can be found:
* :attr:`~subliminal.video.Video.hashes`
"""
if video.size <= 10485760:
logger.warning('Size is lower than 10MB: hashes not computed')
return
logger.debug('Computing hashes for %r', video.name)
for name in providers or default_providers:
provider = provider_manager[name].plugin
if name not in hash_functions:
continue
if not provider.check_types(video):
continue
if languages and not provider.check_languages(languages):
continue
video.hashes[name] = hash_functions[name](video.name)
logger.debug('Computed hashes %r', video.hashes)

View file

@ -45,13 +45,13 @@ def refine(video, embedded_subtitles=True, **kwargs):
# video codec
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
video.video_codec = 'h264'
video.video_codec = 'H.264'
logger.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
video.video_codec = 'DivX'
logger.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
video.video_codec = 'XviD'
video.video_codec = 'Xvid'
logger.debug('Found video_codec %s', video.video_codec)
else:
logger.warning('MKV has no video track')
@ -61,7 +61,7 @@ def refine(video, embedded_subtitles=True, **kwargs):
audio_track = mkv.audio_tracks[0]
# audio codec
if audio_track.codec_id == 'A_AC3':
video.audio_codec = 'AC3'
video.audio_codec = 'Dolby Digital'
logger.debug('Found audio_codec %s', video.audio_codec)
elif audio_track.codec_id == 'A_DTS':
video.audio_codec = 'DTS'

View file

@ -7,7 +7,6 @@ import requests
from .. import __short_version__
from ..cache import REFINER_EXPIRATION_TIME, region
from ..video import Episode, Movie
from ..utils import sanitize
logger = logging.getLogger(__name__)
@ -68,7 +67,8 @@ class OMDBClient(object):
return j
omdb_client = OMDBClient(headers={'User-Agent': 'Subliminal/%s' % __short_version__})
user_agent = 'Subliminal/%s' % __short_version__
omdb_client = OMDBClient(headers={'User-Agent': user_agent})
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
@ -89,7 +89,7 @@ def search(title, type, year):
return all_results
def refine(video, **kwargs):
def refine(video, apikey=None, **kwargs):
"""Refine a video by searching `OMDb API <http://omdbapi.com/>`_.
Several :class:`~subliminal.video.Episode` attributes can be found:
@ -105,6 +105,12 @@ def refine(video, **kwargs):
* :attr:`~subliminal.video.Video.imdb_id`
"""
if not apikey:
logger.warning('No apikey. Skipping omdb refiner.')
return
omdb_client.session.params['apikey'] = apikey
if isinstance(video, Episode):
# exit if the information is complete
if video.series_imdb_id:
@ -119,7 +125,7 @@ def refine(video, **kwargs):
logger.debug('Found %d results', len(results))
# filter the results
results = [r for r in results if sanitize(r['Title']) == sanitize(video.series)]
results = [r for r in results if video.matches(r['Title'])]
if not results:
logger.warning('No matching series found')
return
@ -154,12 +160,12 @@ def refine(video, **kwargs):
# search the movie
results = search(video.title, 'movie', video.year)
if not results:
logger.warning('No results')
logger.warning('No results for movie')
return
logger.debug('Found %d results', len(results))
# filter the results
results = [r for r in results if sanitize(r['Title']) == sanitize(video.title)]
results = [r for r in results if video.matches(r['Title'])]
if not results:
logger.warning('No matching movie found')
return

View file

@ -4,6 +4,8 @@ from functools import wraps
import logging
import re
from babelfish import Country
import guessit
import requests
from .. import __short_version__
@ -190,8 +192,14 @@ class TVDBClient(object):
return r.json()['data']
#: User-Agent to use
user_agent = 'Subliminal/%s' % __short_version__
#: Configured instance of :class:`TVDBClient`
tvdb_client = TVDBClient('5EC930FB90DA1ADA', headers={'User-Agent': 'Subliminal/%s' % __short_version__})
tvdb_client = TVDBClient('5EC930FB90DA1ADA', headers={'User-Agent': user_agent})
#: Configure guessit in order to use GuessitCountryConverter
guessit.api.configure()
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
@ -294,21 +302,33 @@ def refine(video, **kwargs):
# iterate over series names
for series_name in series_names:
# parse as series and year
# parse as series, year and country
series, year, country = series_re.match(series_name).groups()
if year:
year = int(year)
if country:
country = Country.fromguessit(country)
# discard mismatches on year
if year and (video.original_series or video.year != year):
logger.debug('Discarding series name %r mismatch on year %d', series, year)
continue
# discard mismatches on country
if video.country and video.country != country:
logger.debug('Discarding series name %r mismatch on country %r', series, country)
continue
# match on sanitized series name
if sanitize(series) == sanitize(video.series):
logger.debug('Found exact match on series %r', series_name)
matching_result['match'] = {'series': original_match['series'], 'year': series_year,
'original_series': original_match['year'] is None}
matching_result['match'] = {
'series': original_match['series'],
'year': series_year or year,
'country': country,
'original_series': original_match['year'] is None and country is None
}
break
# add the result on match
@ -331,7 +351,9 @@ def refine(video, **kwargs):
# add series information
logger.debug('Found series %r', series)
video.series = matching_result['match']['series']
video.alternative_series.extend(series['aliases'])
video.year = matching_result['match']['year']
video.country = matching_result['match']['country']
video.original_series = matching_result['match']['original_series']
video.series_tvdb_id = series['id']
video.series_imdb_id = series['imdbId'] or None

View file

@ -13,11 +13,13 @@ Available matches:
* hash
* title
* year
* country
* series
* season
* episode
* release_group
* format
* streaming_service
* source
* audio_codec
* resolution
* hearing_impaired
@ -36,15 +38,19 @@ logger = logging.getLogger(__name__)
#: Scores for episodes
episode_scores = {'hash': 359, 'series': 180, 'year': 90, 'season': 30, 'episode': 30, 'release_group': 15,
'format': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
episode_scores = {'hash': 809, 'series': 405, 'year': 135, 'country': 135, 'season': 45, 'episode': 45,
'release_group': 15, 'streaming_service': 15, 'source': 7, 'audio_codec': 3, 'resolution': 2,
'video_codec': 2, 'hearing_impaired': 1}
#: Scores for movies
movie_scores = {'hash': 119, 'title': 60, 'year': 30, 'release_group': 15,
'format': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
movie_scores = {'hash': 269, 'title': 135, 'year': 45, 'country': 45, 'release_group': 15, 'streaming_service': 15,
'source': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
#: All scores names
score_keys = set([s for s in episode_scores.keys()] + [s for s in movie_scores.keys()])
#: Equivalent release groups
equivalent_release_groups = ({'LOL', 'DIMENSION'}, {'ASAP', 'IMMERSE', 'FLEET'})
equivalent_release_groups = ({'LOL', 'DIMENSION'}, {'ASAP', 'IMMERSE', 'FLEET'}, {'AVS', 'SVA'})
def get_equivalent_release_groups(release_group):
@ -118,20 +124,20 @@ def compute_score(subtitle, video, hearing_impaired=None):
matches.add('episode')
if 'series_imdb_id' in matches:
logger.debug('Adding series_imdb_id match equivalent')
matches |= {'series', 'year'}
matches |= {'series', 'year', 'country'}
if 'imdb_id' in matches:
logger.debug('Adding imdb_id match equivalents')
matches |= {'series', 'year', 'season', 'episode'}
matches |= {'series', 'year', 'country', 'season', 'episode'}
if 'tvdb_id' in matches:
logger.debug('Adding tvdb_id match equivalents')
matches |= {'series', 'year', 'season', 'episode'}
matches |= {'series', 'year', 'country', 'season', 'episode'}
if 'series_tvdb_id' in matches:
logger.debug('Adding series_tvdb_id match equivalents')
matches |= {'series', 'year'}
matches |= {'series', 'year', 'country'}
elif isinstance(video, Movie):
if 'imdb_id' in matches:
logger.debug('Adding imdb_id match equivalents')
matches |= {'title', 'year'}
matches |= {'title', 'year', 'country'}
# handle hearing impaired
if hearing_impaired is not None and subtitle.hearing_impaired == hearing_impaired:
@ -151,31 +157,41 @@ def compute_score(subtitle, video, hearing_impaired=None):
def solve_episode_equations():
from sympy import Eq, solve, symbols
hash, series, year, season, episode, release_group = symbols('hash series year season episode release_group')
format, audio_codec, resolution, video_codec = symbols('format audio_codec resolution video_codec')
hash, series, year, country, season, episode = symbols('hash series year country season episode')
release_group, streaming_service, source = symbols('release_group streaming_service source')
audio_codec, resolution, video_codec = symbols('audio_codec resolution video_codec')
hearing_impaired = symbols('hearing_impaired')
equations = [
# hash is best
Eq(hash, series + year + season + episode + release_group + format + audio_codec + resolution + video_codec),
Eq(hash, series + year + country + season + episode +
release_group + streaming_service + source + audio_codec + resolution + video_codec),
# series counts for the most part in the total score
Eq(series, year + season + episode + release_group + format + audio_codec + resolution + video_codec + 1),
Eq(series, year + country + season + episode + release_group + streaming_service + source +
audio_codec + resolution + video_codec + 1),
# year is the second most important part
Eq(year, season + episode + release_group + format + audio_codec + resolution + video_codec + 1),
Eq(year, season + episode + release_group + streaming_service + source +
audio_codec + resolution + video_codec + 1),
# year counts as much as country
Eq(year, country),
# season is important too
Eq(season, release_group + format + audio_codec + resolution + video_codec + 1),
Eq(season, release_group + streaming_service + source + audio_codec + resolution + video_codec + 1),
# episode is equally important to season
Eq(episode, season),
# release group is the next most wanted match
Eq(release_group, format + audio_codec + resolution + video_codec + 1),
Eq(release_group, source + audio_codec + resolution + video_codec + 1),
# format counts as much as audio_codec, resolution and video_codec
Eq(format, audio_codec + resolution + video_codec),
# streaming service counts as much as release group
Eq(release_group, streaming_service),
# source counts as much as audio_codec, resolution and video_codec
Eq(source, audio_codec + resolution + video_codec),
# audio_codec is more valuable than video_codec
Eq(audio_codec, video_codec + 1),
@ -190,32 +206,40 @@ def solve_episode_equations():
Eq(hearing_impaired, 1),
]
return solve(equations, [hash, series, year, season, episode, release_group, format, audio_codec, resolution,
hearing_impaired, video_codec])
return solve(equations, [hash, series, year, country, season, episode, release_group, streaming_service, source,
audio_codec, resolution, hearing_impaired, video_codec])
def solve_movie_equations():
from sympy import Eq, solve, symbols
hash, title, year, release_group = symbols('hash title year release_group')
format, audio_codec, resolution, video_codec = symbols('format audio_codec resolution video_codec')
hearing_impaired = symbols('hearing_impaired')
hash, title, year, country, release_group = symbols('hash title year country release_group')
streaming_service, source, audio_codec, resolution = symbols('streaming_service source audio_codec resolution')
video_codec, hearing_impaired = symbols('video_codec hearing_impaired')
equations = [
# hash is best
Eq(hash, title + year + release_group + format + audio_codec + resolution + video_codec),
Eq(hash, title + year + country + release_group + streaming_service +
source + audio_codec + resolution + video_codec),
# title counts for the most part in the total score
Eq(title, year + release_group + format + audio_codec + resolution + video_codec + 1),
Eq(title, year + country + release_group + streaming_service +
source + audio_codec + resolution + video_codec + 1),
# year is the second most important part
Eq(year, release_group + format + audio_codec + resolution + video_codec + 1),
Eq(year, release_group + streaming_service + source + audio_codec + resolution + video_codec + 1),
# year counts as much as country
Eq(year, country),
# release group is the next most wanted match
Eq(release_group, format + audio_codec + resolution + video_codec + 1),
Eq(release_group, source + audio_codec + resolution + video_codec + 1),
# format counts as much as audio_codec, resolution and video_codec
Eq(format, audio_codec + resolution + video_codec),
# streaming service counts as much as release group
Eq(release_group, streaming_service),
# source counts as much as audio_codec, resolution and video_codec
Eq(source, audio_codec + resolution + video_codec),
# audio_codec is more valuable than video_codec
Eq(audio_codec, video_codec + 1),
@ -230,5 +254,5 @@ def solve_movie_equations():
Eq(hearing_impaired, 1),
]
return solve(equations, [hash, title, year, release_group, format, audio_codec, resolution, hearing_impaired,
video_codec])
return solve(equations, [hash, title, year, country, release_group, streaming_service, source, audio_codec,
resolution, hearing_impaired, video_codec])

View file

@ -6,10 +6,7 @@ import os
import chardet
import pysrt
from .score import get_equivalent_release_groups
from .video import Episode, Movie
from .utils import sanitize, sanitize_release_group
from six import text_type
logger = logging.getLogger(__name__)
@ -60,6 +57,11 @@ class Subtitle(object):
"""Unique identifier of the subtitle"""
raise NotImplementedError
@property
def info(self):
"""Info of the subtitle, human readable. Usually the subtitle name for GUI rendering"""
raise NotImplementedError
@property
def text(self):
"""Content as string
@ -70,10 +72,17 @@ class Subtitle(object):
if not self.content:
return
if self.encoding:
return self.content.decode(self.encoding, errors='replace')
if not isinstance(self.content, text_type):
if self.encoding:
return self.content.decode(self.encoding, errors='replace')
return self.content.decode(self.guess_encoding(), errors='replace')
guessed_encoding = self.guess_encoding()
if guessed_encoding:
return self.content.decode(guessed_encoding, errors='replace')
return None
return self.content
def is_valid(self):
"""Check if a :attr:`text` is a valid SubRip format.
@ -145,6 +154,18 @@ class Subtitle(object):
return encoding
def get_path(self, video, single=False):
"""Get the subtitle path using the `video`, `language` and `extension`.
:param video: path to the video.
:type video: :class:`~subliminal.video.Video`
:param bool single: save a single subtitle, default is to save one subtitle per language.
:return: path of the subtitle.
:rtype: str
"""
return get_subtitle_path(video.name, None if single else self.language)
def get_matches(self, video):
"""Get the matches against the `video`.
@ -182,68 +203,6 @@ def get_subtitle_path(video_path, language=None, extension='.srt'):
return subtitle_root + extension
def guess_matches(video, guess, partial=False):
"""Get matches between a `video` and a `guess`.
If a guess is `partial`, the absence information won't be counted as a match.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param guess: the guess.
:type guess: dict
:param bool partial: whether or not the guess is partial.
:return: matches between the `video` and the `guess`.
:rtype: set
"""
matches = set()
if isinstance(video, Episode):
# series
if video.series and 'title' in guess and sanitize(guess['title']) == sanitize(video.series):
matches.add('series')
# title
if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title):
matches.add('title')
# season
if video.season and 'season' in guess and guess['season'] == video.season:
matches.add('season')
# episode
if video.episode and 'episode' in guess and guess['episode'] == video.episode:
matches.add('episode')
# year
if video.year and 'year' in guess and guess['year'] == video.year:
matches.add('year')
# count "no year" as an information
if not partial and video.original_series and 'year' not in guess:
matches.add('year')
elif isinstance(video, Movie):
# year
if video.year and 'year' in guess and guess['year'] == video.year:
matches.add('year')
# title
if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title):
matches.add('title')
# release_group
if (video.release_group and 'release_group' in guess and
sanitize_release_group(guess['release_group']) in
get_equivalent_release_groups(sanitize_release_group(video.release_group))):
matches.add('release_group')
# resolution
if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
matches.add('resolution')
# format
if video.format and 'format' in guess and guess['format'].lower() == video.format.lower():
matches.add('format')
# video_codec
if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
matches.add('video_codec')
# audio_codec
if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec:
matches.add('audio_codec')
return matches
def fix_line_ending(content):
"""Fix line ending of `content` by changing it to \n.

View file

@ -1,88 +0,0 @@
# -*- coding: utf-8 -*-
from datetime import time
class Component(object):
"""Base class for cue text.
:param list components: sub-components of this one.
"""
tag_name = 'Component'
def __init__(self, components=None):
if components is None:
self.components = []
elif isinstance(components, list):
self.components = components
else:
self.components = [components]
def __iter__(self):
return iter(self.components)
def __len__(self):
return len(self.components)
def __str__(self):
return ''.join(str(c) for c in self.components)
def __repr__(self):
return '<{name}>{components}</{name}>'.format(name=self.tag_name,
components=''.join(repr(c) for c in self.components))
class Bold(Component):
"""Bold :class:`Component`."""
tag_name = 'b'
class Italic(Component):
"""Italic :class:`Component`."""
tag_name = 'i'
class Underline(Component):
"""Underline :class:`Component`."""
tag_name = 'u'
class Strikethrough(Component):
"""Strikethrough :class:`Component`."""
tag_name = 's'
class Font(Component):
"""Font :class:`Component`."""
tag_name = 'font'
def __init__(self, color, *args, **kwargs):
super(Font, self).__init__(*args, **kwargs)
self.color = color
def __repr__(self):
return '<{name} "{color}">{components}</{name}>'.format(name=self.tag_name, color=self.color,
components=''.join(repr(c) for c in self.components))
class Cue(object):
"""A single subtitle cue with timings and components.
:param datetime.time start_time: start time.
:param datetime.time end_time: end time.
:param list components: cue components.
"""
def __init__(self, start_time, end_time, components):
self.start_time = start_time
self.end_time = end_time
self.components = components
def __repr__(self):
return '<Cue [{start_time}->{end_time}] "{text}">'.format(start_time=self.start_time, end_time=self.end_time,
text=''.join(repr(c) for c in self.components))
if __name__ == '__main__':
cue = Cue(time(), time(1), [Bold('Hello')])
print repr(cue)

View file

@ -1,82 +0,0 @@
# -*- coding: utf-8 -*-
import re
from datetime import time
from subliminal.subtitles import Cue
index_re = re.compile(r'(?P<index>\d+)')
timing_re = re.compile(r'(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2}),(?P<milliseconds>\d{3})')
class SubripReadError(Exception):
pass
class SubripReadIndexError(SubripReadError):
pass
class SubripReader(object):
INDEX = 1
TIMINGS = 2
TEXT = 3
def __init__(self):
self.state = self.INDEX
def read(self, content):
pass
def read_line(self, line):
if self.state == self.INDEX:
if index_re.match(line):
raise SubripReadIndexError
def read_cue(stream):
"""Attempt to parse a complete Cue from the stream"""
# skip blank lines
line = ''
while not line:
line = stream.readline()
# parse index
if not index_re.match(line):
raise SubripReadIndexError
# parse timings
line = stream.readline()
if '-->' not in line:
raise SubripReadError
timings = line.split('-->')
if not len(timings):
raise SubripReadError
# parse start time
match = timing_re.match(timings[0].strip())
if not match:
raise SubripReadError
start_time = time(**match.groupdict())
# parse end time
match = timing_re.match(timings[0].strip())
if not match:
raise SubripReadError
end_time = time(**match.groupdict())
class SubripSubtitle(object):
def __init__(self):
self.cues = []
if __name__ == '__main__':
print read_cue('toto')
i = 0
for x in read_cue('toto'):
print x
if i > 10:
break
i += 1

View file

@ -1,10 +1,21 @@
# -*- coding: utf-8 -*-
import logging
from datetime import datetime
import hashlib
import os
import re
import socket
import struct
import requests
from requests.exceptions import SSLError
from six.moves.xmlrpc_client import ProtocolError
from .exceptions import ServiceUnavailable
logger = logging.getLogger(__name__)
def hash_opensubtitles(video_path):
"""Compute a hash using OpenSubtitles' algorithm.
@ -106,7 +117,7 @@ def sanitize(string, ignore_characters=None):
ignore_characters = ignore_characters or set()
# replace some characters with one space
characters = {'-', ':', '(', ')', '.'} - ignore_characters
characters = {'-', ':', '(', ')', '.', ','} - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), ' ', string)
@ -150,3 +161,48 @@ def timestamp(date):
"""
return (date - datetime(1970, 1, 1)).total_seconds()
def matches_title(actual, title, alternative_titles):
"""Whether `actual` matches the `title` or `alternative_titles`
:param str actual: the actual title to check
:param str title: the expected title
:param list alternative_titles: the expected alternative_titles
:return: whether the actual title matches the title or alternative_titles.
:rtype: bool
"""
actual = sanitize(actual)
title = sanitize(title)
if actual == title:
return True
alternative_titles = set(sanitize(t) for t in alternative_titles)
if actual in alternative_titles:
return True
return actual.startswith(title) and actual[len(title):].strip() in alternative_titles
def handle_exception(e, msg):
"""Handle exception, logging the proper error message followed by `msg`.
Exception traceback is only logged for specific cases.
:param exception e: The exception to handle.
:param str msg: The message to log.
"""
if isinstance(e, (requests.Timeout, socket.timeout)):
logger.error('Request timed out. %s', msg)
elif isinstance(e, (ServiceUnavailable, ProtocolError)):
# OpenSubtitles raises xmlrpclib.ProtocolError when unavailable
logger.error('Service unavailable. %s', msg)
elif isinstance(e, requests.exceptions.HTTPError):
logger.error('HTTP error %r. %s', e.response.status_code, msg,
exc_info=e.response.status_code not in range(500, 600))
elif isinstance(e, SSLError):
logger.error('SSL error %r. %s', e.args[0], msg,
exc_info=e.args[0] != 'The read operation timed out')
else:
logger.exception('Unexpected error. %s', msg)

View file

@ -5,6 +5,9 @@ import logging
import os
from guessit import guessit
from rebulk.loose import ensure_list
from subliminal.utils import matches_title
logger = logging.getLogger(__name__)
@ -12,10 +15,10 @@ logger = logging.getLogger(__name__)
VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
'.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
'.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
'.m4v', '.mjp', '.mjpeg', '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4',
'.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm' '.ogv', '.omf',
'.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo',
'.vob', '.vro', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
'.m4v', '.mjp', '.mjpeg', '.mjpg', '.mk3d', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx',
'.mp4', '.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.ogv',
'.omf', '.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv',
'.vivo', '.vob', '.vro', '.webm', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
class Video(object):
@ -24,8 +27,9 @@ class Video(object):
Represent a video, existing or not.
:param str name: name or path of the video.
:param str format: format of the video (HDTV, WEB-DL, BluRay, ...).
:param str source: source of the video (HDTV, Web, Blu-ray, ...).
:param str release_group: release group of the video.
:param str streaming_service: streaming_service of the video.
:param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i).
:param str video_codec: codec of the video stream.
:param str audio_codec: codec of the main audio stream.
@ -35,17 +39,20 @@ class Video(object):
:param set subtitle_languages: existing subtitle languages.
"""
def __init__(self, name, format=None, release_group=None, resolution=None, video_codec=None, audio_codec=None,
imdb_id=None, hashes=None, size=None, subtitle_languages=None):
def __init__(self, name, source=None, release_group=None, resolution=None, streaming_service=None,
video_codec=None, audio_codec=None, imdb_id=None, hashes=None, size=None, subtitle_languages=None):
#: Name or path of the video
self.name = name
#: Format of the video (HDTV, WEB-DL, BluRay, ...)
self.format = format
#: Source of the video (HDTV, Web, Blu-ray, ...)
self.source = source
#: Release group of the video
self.release_group = release_group
#: Streaming service of the video
self.streaming_service = streaming_service
#: Resolution of the video stream (480p, 720p, 1080p or 1080i)
self.resolution = resolution
@ -118,16 +125,19 @@ class Episode(Video):
:param str series: series of the episode.
:param int season: season number of the episode.
:param int episode: episode number of the episode.
:param int or list episodes: episode numbers of the episode.
:param str title: title of the episode.
:param int year: year of the series.
:param country: Country of the series.
:type country: :class:`~babelfish.country.Country`
:param bool original_series: whether the series is the first with this name.
:param int tvdb_id: TVDB id of the episode.
:param list alternative_series: alternative names of the series
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None,
series_tvdb_id=None, series_imdb_id=None, **kwargs):
def __init__(self, name, series, season, episodes, title=None, year=None, country=None, original_series=True,
tvdb_id=None, series_tvdb_id=None, series_imdb_id=None, alternative_series=None, **kwargs):
super(Episode, self).__init__(name, **kwargs)
#: Series of the episode
@ -136,8 +146,8 @@ class Episode(Video):
#: Season number of the episode
self.season = season
#: Episode number of the episode
self.episode = episode
#: Episode numbers of the episode
self.episodes = ensure_list(episodes)
#: Title of the episode
self.title = title
@ -148,6 +158,9 @@ class Episode(Video):
#: The series is the first with this name
self.original_series = original_series
#: Country of the series
self.country = country
#: TVDB id of the episode
self.tvdb_id = tvdb_id
@ -157,6 +170,16 @@ class Episode(Video):
#: IMDb id of the series
self.series_imdb_id = series_imdb_id
#: Alternative names of the series
self.alternative_series = alternative_series or []
@property
def episode(self):
return min(self.episodes) if self.episodes else None
def matches(self, series):
return matches_title(series, self.series, self.alternative_series)
@classmethod
def fromguess(cls, name, guess):
if guess['type'] != 'episode':
@ -165,9 +188,12 @@ class Episode(Video):
if 'title' not in guess or 'episode' not in guess:
raise ValueError('Insufficient data to process the guess')
return cls(name, guess['title'], guess.get('season', 1), guess['episode'], title=guess.get('episode_title'),
year=guess.get('year'), format=guess.get('format'), original_series='year' not in guess,
release_group=guess.get('release_group'), resolution=guess.get('screen_size'),
return cls(name, guess['title'], guess.get('season', 1), guess.get('episode'), title=guess.get('episode_title'),
year=guess.get('year'), country=guess.get('country'),
original_series='year' not in guess and 'country' not in guess,
source=guess.get('source'), alternative_series=ensure_list(guess.get('alternative_title')),
release_group=guess.get('release_group'), streaming_service=guess.get('streaming_service'),
resolution=guess.get('screen_size'),
video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'))
@classmethod
@ -175,10 +201,13 @@ class Episode(Video):
return cls.fromguess(name, guessit(name, {'type': 'episode'}))
def __repr__(self):
if self.year is None:
return '<%s [%r, %dx%d]>' % (self.__class__.__name__, self.series, self.season, self.episode)
return '<%s [%r, %d, %dx%d]>' % (self.__class__.__name__, self.series, self.year, self.season, self.episode)
return '<{cn} [{series}{open}{country}{sep}{year}{close} s{season:02d}e{episodes}]>'.format(
cn=self.__class__.__name__, series=self.series, year=self.year or '', country=self.country or '',
season=self.season, episodes='-'.join(map(lambda v: '{:02d}'.format(v), self.episodes)),
open=' (' if not self.original_series else '',
sep=') (' if self.year and self.country else '',
close=')' if not self.original_series else ''
)
class Movie(Video):
@ -186,10 +215,13 @@ class Movie(Video):
:param str title: title of the movie.
:param int year: year of the movie.
:param country: Country of the movie.
:type country: :class:`~babelfish.country.Country`
:param list alternative_titles: alternative titles of the movie
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, title, year=None, **kwargs):
def __init__(self, name, title, year=None, country=None, alternative_titles=None, **kwargs):
super(Movie, self).__init__(name, **kwargs)
#: Title of the movie
@ -198,6 +230,15 @@ class Movie(Video):
#: Year of the movie
self.year = year
#: Country of the movie
self.country = country
#: Alternative titles of the movie
self.alternative_titles = alternative_titles or []
def matches(self, title):
return matches_title(title, self.title, self.alternative_titles)
@classmethod
def fromguess(cls, name, guess):
if guess['type'] != 'movie':
@ -206,16 +247,20 @@ class Movie(Video):
if 'title' not in guess:
raise ValueError('Insufficient data to process the guess')
return cls(name, guess['title'], format=guess.get('format'), release_group=guess.get('release_group'),
return cls(name, guess['title'], source=guess.get('source'), release_group=guess.get('release_group'),
streaming_service=guess.get('streaming_service'),
resolution=guess.get('screen_size'), video_codec=guess.get('video_codec'),
audio_codec=guess.get('audio_codec'), year=guess.get('year'))
alternative_titles=ensure_list(guess.get('alternative_title')),
audio_codec=guess.get('audio_codec'), year=guess.get('year'), country=guess.get('country'))
@classmethod
def fromname(cls, name):
return cls.fromguess(name, guessit(name, {'type': 'movie'}))
def __repr__(self):
if self.year is None:
return '<%s [%r]>' % (self.__class__.__name__, self.title)
return '<%s [%r, %d]>' % (self.__class__.__name__, self.title, self.year)
return '<{cn} [{title}{open}{country}{sep}{year}{close}]>'.format(
cn=self.__class__.__name__, title=self.title, year=self.year or '', country=self.country or '',
open=' (' if self.year or self.country else '',
sep=') (' if self.year and self.country else '',
close=')' if self.year or self.country else ''
)