Updates vendored subliminal to 2.1.0

Updates rarfile to 3.1
Updates stevedore to 3.5.0
Updates appdirs to 1.4.4
Updates click to 8.1.3
Updates decorator to 5.1.1
Updates dogpile.cache to 1.1.8
Updates pbr to 5.11.0
Updates pysrt to 1.1.2
Updates pytz to 2022.6
Adds importlib-metadata version 3.1.1
Adds typing-extensions version 4.1.1
Adds zipp version 3.11.0
This commit is contained in:
Labrys of Knossos 2022-11-29 00:08:39 -05:00
commit f05b09f349
694 changed files with 16621 additions and 11056 deletions

View file

@ -4,6 +4,7 @@ import logging
from bs4 import BeautifulSoup, FeatureNotFound
from six.moves.xmlrpc_client import SafeTransport
from .. import __short_version__
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
@ -68,6 +69,12 @@ class Provider(object):
#: Required hash, if any
required_hash = None
#: Subtitle class to use
subtitle_class = None
#: User Agent to use
user_agent = 'Subliminal/%s' % __short_version__
def __enter__(self):
self.initialize()
return self
@ -111,13 +118,41 @@ class Provider(object):
:rtype: bool
"""
if not isinstance(video, cls.video_types):
if not cls.check_types(video):
return False
if cls.required_hash is not None and cls.required_hash not in video.hashes:
return False
return True
@classmethod
def check_types(cls, video):
"""Check if the `video` type is supported by the provider.
The `video` is considered invalid if not an instance of :attr:`video_types`.
:param video: the video to check.
:type video: :class:`~subliminal.video.Video`
:return: `True` if the `video` is valid, `False` otherwise.
:rtype: bool
"""
return isinstance(video, cls.video_types)
@classmethod
def check_languages(cls, languages):
"""Check if the `languages` are supported by the provider.
A subset of the supported languages is returned.
:param languages: the languages to check.
:type languages: set of :class:`~babelfish.language.Language`
:return: subset of the supported languages.
:rtype: set of :class:`~babelfish.language.Language`
"""
return cls.languages & languages
def query(self, *args, **kwargs):
"""Query the provider for subtitles.

View file

@ -7,20 +7,22 @@ from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, TooManyRequests
from ..score import get_equivalent_release_groups
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize, sanitize_release_group
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..utils import sanitize
from ..video import Episode
logger = logging.getLogger(__name__)
language_converters.register('addic7ed = subliminal.converters.addic7ed:Addic7edConverter')
# Series cell matching regex
show_cells_re = re.compile(b'<td class="version">.*?</td>', re.DOTALL)
#: Series header parsing regex
series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),&!?-]+?)(?: \((?P<year>\d{4})\))?$')
series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),*&!?-]+?)(?: \((?P<year>\d{4})\))?$')
class Addic7edSubtitle(Subtitle):
@ -29,7 +31,7 @@ class Addic7edSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, year, version,
download_link):
super(Addic7edSubtitle, self).__init__(language, hearing_impaired, page_link)
super(Addic7edSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
@ -42,37 +44,31 @@ class Addic7edSubtitle(Subtitle):
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
@property
def info(self):
return '{series}{yopen}{year}{yclose} s{season:02d}e{episode:02d}{topen}{title}{tclose}{version}'.format(
series=self.series, season=self.season, episode=self.episode, title=self.title, year=self.year or '',
version=self.version, yopen=' (' if self.year else '', yclose=')' if self.year else '',
topen=' - ' if self.title else '', tclose=' - ' if self.version else ''
)
def get_matches(self, video):
# series name
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'episode_title': self.title,
'year': self.year,
'release_group': self.version,
})
# series
if video.series and sanitize(self.series) == sanitize(video.series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# title
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# release_group
if (video.release_group and self.version and
any(r in sanitize_release_group(self.version)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
# format
if video.format and self.version and video.format.lower() in self.version.lower():
matches.add('format')
# other properties
matches |= guess_matches(video, guessit(self.version), partial=True)
if self.version:
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
return matches
@ -86,21 +82,23 @@ class Addic7edProvider(Provider):
]}
video_types = (Episode,)
server_url = 'http://www.addic7ed.com/'
subtitle_class = Addic7edSubtitle
def __init__(self, username=None, password=None):
if username is not None and password is None or username is None and password is not None:
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
# login
if self.username is not None and self.password is not None:
if self.username and self.password:
logger.info('Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)
@ -134,7 +132,16 @@ class Addic7edProvider(Provider):
logger.info('Getting show ids')
r = self.session.get(self.server_url + 'shows.php', timeout=10)
r.raise_for_status()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# LXML parser seems to fail when parsing Addic7ed.com HTML markup.
# Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
# Assuming the site's markup is bad, and stripping it down to only contain what's needed.
show_cells = re.findall(show_cells_re, r.content)
if show_cells:
soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
else:
# If RegEx fails, fall back to original r.content and use 'html.parser'
soup = ParserBeautifulSoup(r.content, ['html.parser'])
# populate the show ids
show_ids = {}
@ -164,10 +171,8 @@ class Addic7edProvider(Provider):
# make the search
logger.info('Searching show ids with %r', params)
r = self.session.get(self.server_url + 'search.php', params=params, timeout=10)
r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10)
r.raise_for_status()
if r.status_code == 304:
raise TooManyRequests()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# get the suggestion
@ -218,24 +223,23 @@ class Addic7edProvider(Provider):
# search as last resort
if not show_id:
logger.warning('Series not found in show ids')
logger.warning('Series %s not found in show ids', series)
show_id = self._search_show_id(series)
return show_id
def query(self, series, season, year=None, country=None):
# get the show id
show_id = self.get_show_id(series, year, country)
if show_id is None:
logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country})
return []
def query(self, show_id, series, season, year=None, country=None):
# get the page of the season of the show
logger.info('Getting the page of show id %d, season %d', show_id, season)
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
r.raise_for_status()
if r.status_code == 304:
raise TooManyRequests()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over subtitle rows
@ -262,16 +266,32 @@ class Addic7edProvider(Provider):
version = cells[4].text
download_link = cells[9].a['href'][1:]
subtitle = Addic7edSubtitle(language, hearing_impaired, page_link, series, season, episode, title, year,
version, download_link)
subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year,
version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
return [s for s in self.query(video.series, video.season, video.year)
if s.language in languages and s.episode == video.episode]
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.get_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download the subtitle
@ -280,6 +300,12 @@ class Addic7edProvider(Provider):
timeout=10)
r.raise_for_status()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('Unable to download subtitle. No data returned from provider')
return
# detect download limit exceeded
if r.headers['Content-Type'] == 'text/html':
raise DownloadLimitExceeded

View file

@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
import io
import json
import logging
from zipfile import ZipFile
from babelfish import Language
from guessit import guessit
from requests import Session
from six.moves import urllib
from . import Provider
from ..cache import EPISODE_EXPIRATION_TIME, region
from ..exceptions import ProviderError
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode
logger = logging.getLogger(__name__)
class ArgenteamSubtitle(Subtitle):
provider_name = 'argenteam'
def __init__(self, language, download_link, series, season, episode, release, version):
super(ArgenteamSubtitle, self).__init__(language, download_link)
self.download_link = download_link
self.series = series
self.season = season
self.episode = episode
self.release = release
self.version = version
@property
def id(self):
return self.download_link
@property
def info(self):
return urllib.parse.unquote(self.download_link.rsplit('/')[-1])
def get_matches(self, video):
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'release_group': self.version
})
# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
return matches
class ArgenteamProvider(Provider):
provider_name = 'argenteam'
language = Language.fromalpha2('es')
languages = {language}
video_types = (Episode,)
server_url = "http://argenteam.net/api/v1/"
subtitle_class = ArgenteamSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def search_episode_id(self, series, season, episode):
"""Search the episode id from the `series`, `season` and `episode`.
:param str series: series of the episode.
:param int season: season of the episode.
:param int episode: episode number.
:return: the episode id, if any.
:rtype: int or None
"""
# make the search
query = '%s S%#02dE%#02d' % (series, season, episode)
logger.info('Searching episode id for %r', query)
r = self.session.get(self.server_url + 'search', params={'q': query}, timeout=10)
r.raise_for_status()
results = json.loads(r.text)
if results['total'] == 1:
return results['results'][0]['id']
logger.error('No episode id found for %r', series)
def query(self, series, season, episode):
episode_id = self.search_episode_id(series, season, episode)
if episode_id is None:
return []
response = self.session.get(self.server_url + 'episode', params={'id': episode_id}, timeout=10)
response.raise_for_status()
content = json.loads(response.text)
subtitles = []
for r in content['releases']:
for s in r['subtitles']:
subtitle = self.subtitle_class(self.language, s['uri'], series, season, episode, r['team'], r['tags'])
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
titles = [video.series] + video.alternative_series
for title in titles:
subs = self.query(title, video.season, video.episode)
if subs:
return subs
return []
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, timeout=10)
r.raise_for_status()
# open the zip
with ZipFile(io.BytesIO(r.content)) as zf:
if len(zf.namelist()) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))

View file

@ -12,14 +12,16 @@ from guessit import guessit
import pytz
import rarfile
from rarfile import RarFile, is_rarfile
from rebulk.loose import ensure_list
from requests import Session
from zipfile import ZipFile, is_zipfile
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError
from ..subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches, sanitize
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError, ServiceUnavailable
from ..matches import guess_matches
from ..subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending
from ..utils import sanitize
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
@ -44,8 +46,11 @@ rating_re = re.compile(r'nota (?P<rating>\d+)')
#: Timestamp parsing regex
timestamp_re = re.compile(r'(?P<day>\d+)/(?P<month>\d+)/(?P<year>\d+) - (?P<hour>\d+):(?P<minute>\d+)')
#: Title with year/country regex
title_re = re.compile(r'^(?P<series>.*?)(?: \((?:(?P<year>\d{4})|(?P<country>[A-Z]{2}))\))?$')
#: Cache key for releases
releases_key = __name__ + ':releases|{archive_id}'
releases_key = __name__ + ':releases|{archive_id}|{archive_name}'
class LegendasTVArchive(object):
@ -60,8 +65,8 @@ class LegendasTVArchive(object):
:param int rating: rating (0-10).
:param timestamp: timestamp.
:type timestamp: datetime.datetime
"""
def __init__(self, id, name, pack, featured, link, downloads=0, rating=0, timestamp=None):
#: Identifier
self.id = id
@ -96,10 +101,11 @@ class LegendasTVArchive(object):
class LegendasTVSubtitle(Subtitle):
"""LegendasTV Subtitle."""
provider_name = 'legendastv'
def __init__(self, language, type, title, year, imdb_id, season, archive, name):
super(LegendasTVSubtitle, self).__init__(language, archive.link)
super(LegendasTVSubtitle, self).__init__(language, page_link=archive.link)
self.type = type
self.title = title
self.year = year
@ -112,40 +118,28 @@ class LegendasTVSubtitle(Subtitle):
def id(self):
return '%s-%s' % (self.archive.id, self.name.lower())
@property
def info(self):
return self.name
def get_matches(self, video, hearing_impaired=False):
matches = set()
matches = guess_matches(video, {
'title': self.title,
'year': self.year
})
# episode
if isinstance(video, Episode) and self.type == 'episode':
# series
if video.series and sanitize(self.title) == sanitize(video.series):
matches.add('series')
# year (year is based on season air date hence the adjustment)
if video.original_series and self.year is None or video.year and video.year == self.year - self.season + 1:
matches.add('year')
# imdb_id
if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
matches.add('series_imdb_id')
# movie
elif isinstance(video, Movie) and self.type == 'movie':
# title
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
if video.year and self.year == video.year:
matches.add('year')
# imdb_id
if video.imdb_id and self.imdb_id == video.imdb_id:
matches.add('imdb_id')
# archive name
matches |= guess_matches(video, guessit(self.archive.name, {'type': self.type}))
# name
matches |= guess_matches(video, guessit(self.name, {'type': self.type}))
@ -157,29 +151,38 @@ class LegendasTVProvider(Provider):
:param str username: username.
:param str password: password.
"""
languages = {Language.fromlegendastv(l) for l in language_converters['legendastv'].codes}
server_url = 'http://legendas.tv/'
subtitle_class = LegendasTVSubtitle
def __init__(self, username=None, password=None):
if username and not password or not username and password:
# Provider needs UNRAR installed. If not available raise ConfigurationError
try:
rarfile.custom_check([rarfile.UNRAR_TOOL], True)
except rarfile.RarExecError:
raise ConfigurationError('UNRAR tool not available')
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
# login
if self.username is not None and self.password is not None:
if self.username and self.password:
logger.info('Logging in')
data = {'_method': 'POST', 'data[User][username]': self.username, 'data[User][password]': self.password}
r = self.session.post(self.server_url + 'login', data, allow_redirects=False, timeout=10)
r.raise_for_status()
raise_for_status(r)
soup = ParserBeautifulSoup(r.content, ['html.parser'])
if soup.find('div', {'class': 'alert-error'}, string=re.compile(u'Usuário ou senha inválidos')):
@ -193,94 +196,174 @@ class LegendasTVProvider(Provider):
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'users/logout', allow_redirects=False, timeout=10)
r.raise_for_status()
raise_for_status(r)
logger.debug('Logged out')
self.logged_in = False
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def search_titles(self, title):
@staticmethod
def is_valid_title(title, title_id, sanitized_title, season, year):
"""Check if is a valid title."""
sanitized_result = sanitize(title['title'])
if sanitized_result != sanitized_title:
logger.debug("Mismatched title, discarding title %d (%s)",
title_id, sanitized_result)
return
# episode type
if season:
# discard mismatches on type
if title['type'] != 'episode':
logger.debug("Mismatched 'episode' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on season
if 'season' not in title or title['season'] != season:
logger.debug('Mismatched season %s, discarding title %d (%s)',
title.get('season'), title_id, sanitized_result)
return
# movie type
else:
# discard mismatches on type
if title['type'] != 'movie':
logger.debug("Mismatched 'movie' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on year
if year is not None and 'year' in title and title['year'] != year:
logger.debug("Mismatched movie year, discarding title %d (%s)", title_id, sanitized_result)
return
return True
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def search_titles(self, title, season, title_year):
"""Search for titles matching the `title`.
For episodes, each season has it own title
:param str title: the title to search for.
:param int season: season of the title
:param int title_year: year of the title
:return: found titles.
:rtype: dict
"""
# make the query
logger.info('Searching title %r', title)
r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(title), timeout=10)
r.raise_for_status()
results = json.loads(r.text)
# loop over results
titles = {}
for result in results:
source = result['_source']
sanitized_titles = [sanitize(title)]
ignore_characters = {'\'', '.'}
if any(c in title for c in ignore_characters):
sanitized_titles.append(sanitize(title, ignore_characters=ignore_characters))
# extract id
title_id = int(source['id_filme'])
for sanitized_title in sanitized_titles:
# make the query
if season:
logger.info('Searching episode title %r for season %r', sanitized_title, season)
else:
logger.info('Searching movie title %r', sanitized_title)
# extract type and title
title = {'type': type_map[source['tipo']], 'title': source['dsc_nome']}
r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10)
raise_for_status(r)
results = json.loads(r.text)
# extract year
if source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit():
title['year'] = int(source['dsc_data_lancamento'])
# loop over results
for result in results:
source = result['_source']
# extract imdb_id
if source['id_imdb'] != '0':
if not source['id_imdb'].startswith('tt'):
title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
else:
title['imdb_id'] = source['id_imdb']
# extract id
title_id = int(source['id_filme'])
# extract season
if title['type'] == 'episode':
if source['temporada'] and source['temporada'].isdigit():
title['season'] = int(source['temporada'])
else:
match = season_re.search(source['dsc_nome_br'])
if match:
title['season'] = int(match.group('season'))
# extract type
title = {'type': type_map[source['tipo']]}
# extract title, year and country
name, year, country = title_re.match(source['dsc_nome']).groups()
title['title'] = name
# extract imdb_id
if source['id_imdb'] != '0':
if not source['id_imdb'].startswith('tt'):
title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
else:
logger.warning('No season detected for title %d', title_id)
title['imdb_id'] = source['id_imdb']
# add title
titles[title_id] = title
# extract season
if title['type'] == 'episode':
if source['temporada'] and source['temporada'].isdigit():
title['season'] = int(source['temporada'])
else:
match = season_re.search(source['dsc_nome_br'])
if match:
title['season'] = int(match.group('season'))
else:
logger.debug('No season detected for title %d (%s)', title_id, name)
logger.debug('Found %d titles', len(titles))
# extract year
if year:
title['year'] = int(year)
elif source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit():
# year is based on season air date hence the adjustment
title['year'] = int(source['dsc_data_lancamento']) - title.get('season', 1) + 1
# add title only if is valid
# Check against title without ignored chars
if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year):
titles[title_id] = title
logger.debug('Found %d titles', len(titles))
return titles
@region.cache_on_arguments(expiration_time=timedelta(minutes=15).total_seconds())
def get_archives(self, title_id, language_code):
"""Get the archive list from a given `title_id` and `language_code`.
def get_archives(self, title_id, language_code, title_type, season, episodes):
"""Get the archive list from a given `title_id`, `language_code`, `title_type`, `season` and `episode`.
:param int title_id: title id.
:param int language_code: language code.
:param str title_type: episode or movie
:param int season: season
:param list episodes: episodes
:return: the archives.
:rtype: list of :class:`LegendasTVArchive`
"""
logger.info('Getting archives for title %d and language %d', title_id, language_code)
archives = []
page = 1
page = 0
while True:
# get the archive page
url = self.server_url + 'util/carrega_legendas_busca_filme/{title}/{language}/-/{page}'.format(
title=title_id, language=language_code, page=page)
url = self.server_url + 'legenda/busca/-/{language}/-/{page}/{title}'.format(
language=language_code, page=page, title=title_id)
r = self.session.get(url)
r.raise_for_status()
raise_for_status(r)
# parse the results
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
for archive_soup in soup.select('div.list_element > article > div'):
for archive_soup in soup.select('div.list_element > article > div > div.f_left'):
# create archive
archive = LegendasTVArchive(archive_soup.a['href'].split('/')[2], archive_soup.a.text,
'pack' in archive_soup['class'], 'destaque' in archive_soup['class'],
archive = LegendasTVArchive(archive_soup.a['href'].split('/')[2],
archive_soup.a.text,
'pack' in archive_soup.parent['class'],
'destaque' in archive_soup.parent['class'],
self.server_url + archive_soup.a['href'][1:])
# clean name of path separators and pack flags
clean_name = archive.name.replace('/', '-')
if archive.pack and clean_name.startswith('(p)'):
clean_name = clean_name[3:]
# guess from name
guess = guessit(clean_name, {'type': title_type})
# episode
if season and episodes:
# discard mismatches on episode in non-pack archives
# Guessit may return int for single episode or list for multi-episode
# Check if archive name has multiple episodes releases on it
if not archive.pack and 'episode' in guess:
wanted_episode = set(episodes)
archive_episode = set(ensure_list(guess['episode']))
if not wanted_episode.intersection(archive_episode):
logger.debug('Mismatched episode %s, discarding archive: %s', guess['episode'], clean_name)
continue
# extract text containing downloads, rating and timestamp
data_text = archive_soup.find('p', class_='data').text
@ -300,6 +383,8 @@ class LegendasTVProvider(Provider):
raise ProviderError('Archive timestamp is in the future')
# add archive
logger.info('Found archive for title %d and language %d at page %s: %s',
title_id, language_code, page, archive)
archives.append(archive)
# stop on last page
@ -322,7 +407,7 @@ class LegendasTVProvider(Provider):
"""
logger.info('Downloading archive %s', archive.id)
r = self.session.get(self.server_url + 'downloadarquivo/{}'.format(archive.id))
r.raise_for_status()
raise_for_status(r)
# open the archive
archive_stream = io.BytesIO(r.content)
@ -335,62 +420,28 @@ class LegendasTVProvider(Provider):
else:
raise ValueError('Not a valid archive')
def query(self, language, title, season=None, episode=None, year=None):
def query(self, language, title, season=None, episodes=None, year=None):
# search for titles
titles = self.search_titles(sanitize(title))
# search for titles with the quote or dot character
ignore_characters = {'\'', '.'}
if any(c in title for c in ignore_characters):
titles.update(self.search_titles(sanitize(title, ignore_characters=ignore_characters)))
titles = self.search_titles(title, season, year)
subtitles = []
# iterate over titles
for title_id, t in titles.items():
# discard mismatches on title
if sanitize(t['title']) != sanitize(title):
continue
# episode
if season and episode:
# discard mismatches on type
if t['type'] != 'episode':
continue
# discard mismatches on season
if 'season' not in t or t['season'] != season:
continue
# movie
else:
# discard mismatches on type
if t['type'] != 'movie':
continue
# discard mismatches on year
if year is not None and 'year' in t and t['year'] != year:
continue
logger.info('Getting archives for title %d and language %d', title_id, language.legendastv)
archives = self.get_archives(title_id, language.legendastv, t['type'], season, episodes or [])
if not archives:
logger.info('No archives found for title %d and language %d', title_id, language.legendastv)
# iterate over title's archives
for a in self.get_archives(title_id, language.legendastv):
# clean name of path separators and pack flags
clean_name = a.name.replace('/', '-')
if a.pack and clean_name.startswith('(p)'):
clean_name = clean_name[3:]
# guess from name
guess = guessit(clean_name, {'type': t['type']})
# episode
if season and episode:
# discard mismatches on episode in non-pack archives
if not a.pack and 'episode' in guess and guess['episode'] != episode:
continue
for a in archives:
# compute an expiration time based on the archive timestamp
expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds()
# attempt to get the releases from the cache
releases = region.get(releases_key.format(archive_id=a.id), expiration_time=expiration_time)
cache_key = releases_key.format(archive_id=a.id, archive_name=a.name)
releases = region.get(cache_key, expiration_time=expiration_time)
# the releases are not in cache or cache is expired
if releases == NO_VALUE:
@ -417,27 +468,34 @@ class LegendasTVProvider(Provider):
releases.append(name)
# cache the releases
region.set(releases_key.format(archive_id=a.id), releases)
region.set(cache_key, releases)
# iterate over releases
for r in releases:
subtitle = LegendasTVSubtitle(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
t.get('season'), a, r)
subtitle = self.subtitle_class(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
t.get('season'), a, r)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
season = None
episodes = []
if isinstance(video, Episode):
title = video.series
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
episodes = video.episodes
else:
title = video.title
titles = [video.title] + video.alternative_titles
return [s for l in languages for s in self.query(l, title, season=season, episode=episode, year=video.year)]
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episodes=episodes, year=video.year)]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
# download archive in case we previously hit the releases cache and didn't download it
@ -446,3 +504,11 @@ class LegendasTVProvider(Provider):
# extract subtitle's content
subtitle.content = fix_line_ending(subtitle.archive.content.read(subtitle.name))
def raise_for_status(r):
# When site is under maintaince and http status code 200.
if 'Em breve estaremos de volta' in r.text:
raise ServiceUnavailable
else:
r.raise_for_status()

View file

@ -5,7 +5,6 @@ from babelfish import Language
from requests import Session
from . import Provider
from .. import __short_version__
from ..subtitle import Subtitle
logger = logging.getLogger(__name__)
@ -42,11 +41,16 @@ class NapiProjektSubtitle(Subtitle):
def __init__(self, language, hash):
super(NapiProjektSubtitle, self).__init__(language)
self.hash = hash
self.content = None
@property
def id(self):
return self.hash
@property
def info(self):
return self.hash
def get_matches(self, video):
matches = set()
@ -62,10 +66,14 @@ class NapiProjektProvider(Provider):
languages = {Language.fromalpha2(l) for l in ['pl']}
required_hash = 'napiprojekt'
server_url = 'http://napiprojekt.pl/unit_napisy/dl.php'
subtitle_class = NapiProjektSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -81,16 +89,16 @@ class NapiProjektProvider(Provider):
'f': hash,
't': get_subhash(hash)}
logger.info('Searching subtitle %r', params)
response = self.session.get(self.server_url, params=params, timeout=10)
response.raise_for_status()
r = self.session.get(self.server_url, params=params, timeout=10)
r.raise_for_status()
# handle subtitles not found and errors
if response.content[:4] == b'NPc0':
if r.content[:4] == b'NPc0':
logger.debug('No subtitles found')
return None
subtitle = NapiProjektSubtitle(language, hash)
subtitle.content = response.content
subtitle = self.subtitle_class(language, hash)
subtitle.content = r.content
logger.debug('Found subtitle %r', subtitle)
return subtitle

View file

@ -11,9 +11,10 @@ from six.moves.xmlrpc_client import ServerProxy
from . import Provider, TimeoutSafeTransport
from .. import __short_version__
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..exceptions import (AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError,
ServiceUnavailable)
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
@ -26,7 +27,8 @@ class OpenSubtitlesSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name,
movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, filename, encoding):
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired, page_link, encoding)
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired=hearing_impaired,
page_link=page_link, encoding=encoding)
self.subtitle_id = subtitle_id
self.matched_by = matched_by
self.movie_kind = movie_kind
@ -43,6 +45,14 @@ class OpenSubtitlesSubtitle(Subtitle):
def id(self):
return str(self.subtitle_id)
@property
def info(self):
if not self.filename and not self.movie_release_name:
return self.subtitle_id
if self.movie_release_name and len(self.movie_release_name) > len(self.filename):
return self.movie_release_name
return self.filename
@property
def series_name(self):
return self.series_re.match(self.movie_name).group('series_name')
@ -52,60 +62,39 @@ class OpenSubtitlesSubtitle(Subtitle):
return self.series_re.match(self.movie_name).group('series_title')
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode) and self.movie_kind == 'episode':
# tag match, assume series, year, season and episode matches
if self.matched_by == 'tag':
matches |= {'series', 'year', 'season', 'episode'}
# series
if video.series and sanitize(self.series_name) == sanitize(video.series):
matches.add('series')
# year
if video.original_series and self.movie_year is None or video.year and video.year == self.movie_year:
matches.add('year')
# season
if video.season and self.series_season == video.season:
matches.add('season')
# episode
if video.episode and self.series_episode == video.episode:
matches.add('episode')
# title
if video.title and sanitize(self.series_title) == sanitize(video.title):
matches.add('title')
# guess
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': 'episode'}))
matches |= guess_matches(video, guessit(self.filename, {'type': 'episode'}))
# hash
if 'opensubtitles' in video.hashes and self.hash == video.hashes['opensubtitles']:
if 'series' in matches and 'season' in matches and 'episode' in matches:
matches.add('hash')
else:
logger.debug('Match on hash discarded')
# movie
elif isinstance(video, Movie) and self.movie_kind == 'movie':
# tag match, assume title and year matches
if self.matched_by == 'tag':
matches |= {'title', 'year'}
# title
if video.title and sanitize(self.movie_name) == sanitize(video.title):
matches.add('title')
# year
if video.year and self.movie_year == video.year:
matches.add('year')
# guess
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': 'movie'}))
matches |= guess_matches(video, guessit(self.filename, {'type': 'movie'}))
# hash
if 'opensubtitles' in video.hashes and self.hash == video.hashes['opensubtitles']:
if 'title' in matches:
matches.add('hash')
else:
logger.debug('Match on hash discarded')
else:
if (isinstance(video, Episode) and self.movie_kind != 'episode') or (
isinstance(video, Movie) and self.movie_kind != 'movie'):
logger.info('%r is not a valid movie_kind', self.movie_kind)
return matches
return set()
matches = guess_matches(video, {
'title': self.series_name if self.movie_kind == 'episode' else self.movie_name,
'episode_title': self.series_title if self.movie_kind == 'episode' else None,
'year': self.movie_year,
'season': self.series_season,
'episode': self.series_episode
})
# tag
if self.matched_by == 'tag':
if not video.imdb_id or self.movie_imdb_id == video.imdb_id:
if self.movie_kind == 'episode':
matches |= {'series', 'year', 'season', 'episode'}
elif self.movie_kind == 'movie':
matches |= {'title', 'year'}
# guess
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': self.movie_kind}))
matches |= guess_matches(video, guessit(self.filename, {'type': self.movie_kind}))
# hash
if 'opensubtitles' in video.hashes and self.hash == video.hashes['opensubtitles']:
if self.movie_kind == 'movie' and 'title' in matches:
matches.add('hash')
elif self.movie_kind == 'episode' and 'series' in matches and 'season' in matches and 'episode' in matches:
matches.add('hash')
else:
logger.debug('Match on hash discarded')
# imdb_id
if video.imdb_id and self.movie_imdb_id == video.imdb_id:
@ -122,10 +111,13 @@ class OpenSubtitlesProvider(Provider):
"""
languages = {Language.fromopensubtitles(l) for l in language_converters['opensubtitles'].codes}
server_url = 'https://api.opensubtitles.org/xml-rpc'
subtitle_class = OpenSubtitlesSubtitle
user_agent = 'subliminal v%s' % __short_version__
def __init__(self, username=None, password=None):
self.server = ServerProxy('https://api.opensubtitles.org/xml-rpc', TimeoutSafeTransport(10))
if username and not password or not username and password:
self.server = ServerProxy(self.server_url, TimeoutSafeTransport(10))
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
# None values not allowed for logging in, so replace it by ''
self.username = username or ''
@ -134,8 +126,7 @@ class OpenSubtitlesProvider(Provider):
def initialize(self):
logger.info('Logging in')
response = checked(self.server.LogIn(self.username, self.password, 'eng',
'subliminal v%s' % __short_version__))
response = checked(self.server.LogIn(self.username, self.password, 'eng', self.user_agent))
self.token = response['token']
logger.debug('Logged in with token %r', self.token)
@ -156,7 +147,10 @@ class OpenSubtitlesProvider(Provider):
if hash and size:
criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
if imdb_id:
criteria.append({'imdbid': imdb_id[2:]})
if season and episode:
criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode})
else:
criteria.append({'imdbid': imdb_id[2:]})
if tag:
criteria.append({'tag': tag})
if query and season and episode:
@ -199,9 +193,9 @@ class OpenSubtitlesProvider(Provider):
filename = subtitle_item['SubFileName']
encoding = subtitle_item.get('SubEncoding') or None
subtitle = OpenSubtitlesSubtitle(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind,
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, filename, encoding)
subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind,
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, filename, encoding)
logger.debug('Found subtitle %r by %s', subtitle, matched_by)
subtitles.append(subtitle)
@ -225,6 +219,17 @@ class OpenSubtitlesProvider(Provider):
subtitle.content = fix_line_ending(zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
class OpenSubtitlesVipSubtitle(OpenSubtitlesSubtitle):
"""OpenSubtitles Subtitle."""
provider_name = 'opensubtitlesvip'
class OpenSubtitlesVipProvider(OpenSubtitlesProvider):
"""OpenSubtitles Provider using VIP url."""
server_url = 'https://vip-api.opensubtitles.org/xml-rpc'
subtitle_class = OpenSubtitlesVipSubtitle
class OpenSubtitlesError(ProviderError):
"""Base class for non-generic :class:`OpenSubtitlesProvider` exceptions."""
pass
@ -260,11 +265,6 @@ class DisabledUserAgent(OpenSubtitlesError, AuthenticationError):
pass
class ServiceUnavailable(OpenSubtitlesError):
"""Exception raised when status is '503 Service Unavailable'."""
pass
def checked(response):
"""Check a response status before returning it.

View file

@ -16,11 +16,10 @@ from requests import Session
from zipfile import ZipFile
from . import Provider
from .. import __short_version__
from ..exceptions import ProviderError
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..video import Episode, Movie
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode
logger = logging.getLogger(__name__)
@ -31,7 +30,7 @@ class PodnapisiSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, pid, releases, title, season=None, episode=None,
year=None):
super(PodnapisiSubtitle, self).__init__(language, hearing_impaired, page_link)
super(PodnapisiSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.pid = pid
self.releases = releases
self.title = title
@ -43,37 +42,21 @@ class PodnapisiSubtitle(Subtitle):
def id(self):
return self.pid
def get_matches(self, video):
matches = set()
@property
def info(self):
return ' '.join(self.releases) or self.pid
# episode
if isinstance(video, Episode):
# series
if video.series and sanitize(self.title) == sanitize(video.series):
matches.add('series')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'episode'}))
# movie
elif isinstance(video, Movie):
# title
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
if video.year and self.year == video.year:
matches.add('year')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'movie'}))
def get_matches(self, video):
matches = guess_matches(video, {
'title': self.title,
'year': self.year,
'season': self.season,
'episode': self.episode
})
video_type = 'episode' if isinstance(video, Episode) else 'movie'
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': video_type}))
return matches
@ -82,11 +65,15 @@ class PodnapisiProvider(Provider):
"""Podnapisi Provider."""
languages = ({Language('por', 'BR'), Language('srp', script='Latn')} |
{Language.fromalpha2(l) for l in language_converters['alpha2'].codes})
server_url = 'http://podnapisi.net/subtitles/'
server_url = 'https://www.podnapisi.net/subtitles/'
subtitle_class = PodnapisiSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -108,7 +95,9 @@ class PodnapisiProvider(Provider):
pids = set()
while True:
# query the server
xml = etree.fromstring(self.session.get(self.server_url + 'search/old', params=params, timeout=10).content)
r = self.session.get(self.server_url + 'search/old', params=params, timeout=10)
r.raise_for_status()
xml = etree.fromstring(r.content)
# exit if no results
if not int(xml.find('pagination/results').text):
@ -118,10 +107,14 @@ class PodnapisiProvider(Provider):
# loop over subtitles
for subtitle_xml in xml.findall('subtitle'):
# read xml elements
pid = subtitle_xml.find('pid').text
# ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
if pid in pids:
continue
language = Language.fromietf(subtitle_xml.find('language').text)
hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
page_link = subtitle_xml.find('url').text
pid = subtitle_xml.find('pid').text
releases = []
if subtitle_xml.find('release').text:
for release in subtitle_xml.find('release').text.split():
@ -134,15 +127,11 @@ class PodnapisiProvider(Provider):
year = int(subtitle_xml.find('year').text)
if is_episode:
subtitle = PodnapisiSubtitle(language, hearing_impaired, page_link, pid, releases, title,
season=season, episode=episode, year=year)
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
season=season, episode=episode, year=year)
else:
subtitle = PodnapisiSubtitle(language, hearing_impaired, page_link, pid, releases, title,
year=year)
# ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
if pid in pids:
continue
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
year=year)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
@ -159,11 +148,21 @@ class PodnapisiProvider(Provider):
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
if isinstance(video, Episode):
return [s for l in languages for s in self.query(l, video.series, season=video.season,
episode=video.episode, year=video.year)]
elif isinstance(video, Movie):
return [s for l in languages for s in self.query(l, video.title, year=video.year)]
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
else:
titles = [video.title] + video.alternative_titles
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episode=episode, year=video.year)]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
# download as a zip

View file

@ -7,7 +7,6 @@ from babelfish import Language, language_converters
from requests import Session
from . import Provider
from .. import __short_version__
from ..subtitle import Subtitle, fix_line_ending
logger = logging.getLogger(__name__)
@ -28,6 +27,10 @@ class ShooterSubtitle(Subtitle):
def id(self):
return self.download_link
@property
def info(self):
return self.hash
def get_matches(self, video):
matches = set()
@ -42,10 +45,14 @@ class ShooterProvider(Provider):
"""Shooter Provider."""
languages = {Language(l) for l in ['eng', 'zho']}
server_url = 'https://www.shooter.cn/api/subapi.php'
subtitle_class = ShooterSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -64,7 +71,7 @@ class ShooterProvider(Provider):
# parse the subtitles
results = json.loads(r.text)
subtitles = [ShooterSubtitle(language, hash, t['Link']) for s in results for t in s['Files']]
subtitles = [self.subtitle_class(language, hash, t['Link']) for s in results for t in s['Files']]
return subtitles

View file

@ -1,235 +0,0 @@
# -*- coding: utf-8 -*-
import bisect
from collections import defaultdict
import io
import json
import logging
import zipfile
from babelfish import Language
from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
class SubsCenterSubtitle(Subtitle):
"""SubsCenter Subtitle."""
provider_name = 'subscenter'
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, subtitle_id, subtitle_key,
downloaded, releases):
super(SubsCenterSubtitle, self).__init__(language, hearing_impaired, page_link)
self.series = series
self.season = season
self.episode = episode
self.title = title
self.subtitle_id = subtitle_id
self.subtitle_key = subtitle_key
self.downloaded = downloaded
self.releases = releases
@property
def id(self):
return str(self.subtitle_id)
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode):
# series
if video.series and sanitize(self.series) == sanitize(video.series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'episode'}))
# movie
elif isinstance(video, Movie):
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'movie'}))
# title
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
return matches
class SubsCenterProvider(Provider):
"""SubsCenter Provider."""
languages = {Language.fromalpha2(l) for l in ['he']}
server_url = 'http://www.subscenter.co/he/'
def __init__(self, username=None, password=None):
if username is not None and password is None or username is None and password is not None:
raise ConfigurationError('Username and password must be specified')
self.session = None
self.username = username
self.password = password
self.logged_in = False
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
# login
if self.username is not None and self.password is not None:
logger.debug('Logging in')
url = self.server_url + 'subscenter/accounts/login/'
# retrieve CSRF token
self.session.get(url)
csrf_token = self.session.cookies['csrftoken']
# actual login
data = {'username': self.username, 'password': self.password, 'csrfmiddlewaretoken': csrf_token}
r = self.session.post(url, data, allow_redirects=False, timeout=10)
if r.status_code != 302:
raise AuthenticationError(self.username)
logger.info('Logged in')
self.logged_in = True
def terminate(self):
# logout
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'subscenter/accounts/logout/', timeout=10)
r.raise_for_status()
logger.info('Logged out')
self.logged_in = False
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _search_url_titles(self, title):
"""Search the URL titles by kind for the given `title`.
:param str title: title to search for.
:return: the URL titles by kind.
:rtype: collections.defaultdict
"""
# make the search
logger.info('Searching title name for %r', title)
r = self.session.get(self.server_url + 'subtitle/search/', params={'q': title}, timeout=10)
r.raise_for_status()
# check for redirections
if r.history and all([h.status_code == 302 for h in r.history]):
logger.debug('Redirected to the subtitles page')
links = [r.url]
else:
# get the suggestions (if needed)
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
links = [link.attrs['href'] for link in soup.select('#processes div.generalWindowTop a')]
logger.debug('Found %d suggestions', len(links))
url_titles = defaultdict(list)
for link in links:
parts = link.split('/')
url_titles[parts[-3]].append(parts[-2])
return url_titles
def query(self, title, season=None, episode=None):
# search for the url title
url_titles = self._search_url_titles(title)
# episode
if season and episode:
if 'series' not in url_titles:
logger.error('No URL title found for series %r', title)
return []
url_title = url_titles['series'][0]
logger.debug('Using series title %r', url_title)
url = self.server_url + 'cst/data/series/sb/{}/{}/{}/'.format(url_title, season, episode)
page_link = self.server_url + 'subtitle/series/{}/{}/{}/'.format(url_title, season, episode)
else:
if 'movie' not in url_titles:
logger.error('No URL title found for movie %r', title)
return []
url_title = url_titles['movie'][0]
logger.debug('Using movie title %r', url_title)
url = self.server_url + 'cst/data/movie/sb/{}/'.format(url_title)
page_link = self.server_url + 'subtitle/movie/{}/'.format(url_title)
# get the list of subtitles
logger.debug('Getting the list of subtitles')
r = self.session.get(url)
r.raise_for_status()
results = json.loads(r.text)
# loop over results
subtitles = {}
for language_code, language_data in results.items():
for quality_data in language_data.values():
for quality, subtitles_data in quality_data.items():
for subtitle_item in subtitles_data.values():
# read the item
language = Language.fromalpha2(language_code)
hearing_impaired = bool(subtitle_item['hearing_impaired'])
subtitle_id = subtitle_item['id']
subtitle_key = subtitle_item['key']
downloaded = subtitle_item['downloaded']
release = subtitle_item['subtitle_version']
# add the release and increment downloaded count if we already have the subtitle
if subtitle_id in subtitles:
logger.debug('Found additional release %r for subtitle %d', release, subtitle_id)
bisect.insort_left(subtitles[subtitle_id].releases, release) # deterministic order
subtitles[subtitle_id].downloaded += downloaded
continue
# otherwise create it
subtitle = SubsCenterSubtitle(language, hearing_impaired, page_link, title, season, episode,
title, subtitle_id, subtitle_key, downloaded, [release])
logger.debug('Found subtitle %r', subtitle)
subtitles[subtitle_id] = subtitle
return subtitles.values()
def list_subtitles(self, video, languages):
season = episode = None
title = video.title
if isinstance(video, Episode):
title = video.series
season = video.season
episode = video.episode
return [s for s in self.query(title, season, episode) if s.language in languages]
def download_subtitle(self, subtitle):
# download
url = self.server_url + 'subtitle/download/{}/{}/'.format(subtitle.language.alpha2, subtitle.subtitle_id)
params = {'v': subtitle.releases[0], 'key': subtitle.subtitle_key}
r = self.session.get(url, params=params, headers={'Referer': subtitle.page_link}, timeout=10)
r.raise_for_status()
# open the zip
with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
# remove some filenames from the namelist
namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
if len(namelist) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(namelist[0]))

View file

@ -25,6 +25,10 @@ class TheSubDBSubtitle(Subtitle):
def id(self):
return self.hash + '-' + str(self.language)
@property
def info(self):
return self.hash
def get_matches(self, video):
matches = set()
@ -40,11 +44,15 @@ class TheSubDBProvider(Provider):
languages = {Language.fromthesubdb(l) for l in language_converters['thesubdb'].codes}
required_hash = 'thesubdb'
server_url = 'http://api.thesubdb.com/'
subtitle_class = TheSubDBSubtitle
user_agent = 'SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)' % __short_version__
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = ('SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)' %
__short_version__)
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -66,7 +74,7 @@ class TheSubDBProvider(Provider):
for language_code in r.text.split(','):
language = Language.fromthesubdb(language_code)
subtitle = TheSubDBSubtitle(language, hash)
subtitle = self.subtitle_class(language, hash)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)

View file

@ -9,12 +9,10 @@ from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import EPISODE_EXPIRATION_TIME, SHOW_EXPIRATION_TIME, region
from ..exceptions import ProviderError
from ..score import get_equivalent_release_groups
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize, sanitize_release_group
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..video import Episode
logger = logging.getLogger(__name__)
@ -43,31 +41,24 @@ class TVsubtitlesSubtitle(Subtitle):
def id(self):
return str(self.subtitle_id)
def get_matches(self, video):
matches = set()
@property
def info(self):
return self.release or self.rip
def get_matches(self, video):
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'year': self.year,
'release_group': self.release
})
# series
if video.series and sanitize(self.series) == sanitize(video.series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# release_group
if (video.release_group and self.release and
any(r in sanitize_release_group(self.release)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# other properties
if self.release:
matches |= guess_matches(video, guessit(self.release, {'type': 'episode'}), partial=True)
if self.rip:
matches |= guess_matches(video, guessit(self.rip), partial=True)
matches |= guess_matches(video, guessit(self.rip, {'type': 'episode'}), partial=True)
return matches
@ -80,10 +71,14 @@ class TVsubtitlesProvider(Provider):
]}
video_types = (Episode,)
server_url = 'http://www.tvsubtitles.net/'
subtitle_class = TVsubtitlesSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
self.session.headers['User-Agent'] = self.user_agent
def terminate(self):
self.session.close()
@ -158,13 +153,7 @@ class TVsubtitlesProvider(Provider):
return episode_ids
def query(self, series, season, episode, year=None):
# search the show id
show_id = self.search_show_id(series, year)
if show_id is None:
logger.error('No show id found for %r (%r)', series, {'year': year})
return []
def query(self, show_id, series, season, episode, year=None):
# get the episode ids
episode_ids = self.get_episode_ids(show_id, season)
if episode not in episode_ids:
@ -184,9 +173,9 @@ class TVsubtitlesProvider(Provider):
subtitle_id = int(row.parent['href'][10:-5])
page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
rip = row.find('p', title='rip').text.strip() or None
release = row.find('p', title='release').text.strip() or None
release = row.find('h5').text.strip() or None
subtitle = TVsubtitlesSubtitle(language, page_link, subtitle_id, series, season, episode, year, rip,
subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip,
release)
logger.debug('Found subtitle %s', subtitle)
subtitles.append(subtitle)
@ -194,7 +183,24 @@ class TVsubtitlesProvider(Provider):
return subtitles
def list_subtitles(self, video, languages):
return [s for s in self.query(video.series, video.season, video.episode, video.year) if s.language in languages]
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.search_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.episode, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download as a zip