mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-20 21:33:13 -07:00
add subliminal for subtitle download. #253
This commit is contained in:
parent
47289c903a
commit
c3889c01b1
149 changed files with 34173 additions and 33 deletions
278
libs/subliminal/subtitle.py
Normal file
278
libs/subliminal/subtitle.py
Normal file
|
@ -0,0 +1,278 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
import logging
|
||||
import os.path
|
||||
import babelfish
|
||||
import chardet
|
||||
import guessit.matchtree
|
||||
import guessit.transfo
|
||||
import pysrt
|
||||
from .video import Episode, Movie
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Subtitle(object):
|
||||
"""Base class for subtitle
|
||||
|
||||
:param language: language of the subtitle
|
||||
:type language: :class:`babelfish.Language`
|
||||
:param bool hearing_impaired: `True` if the subtitle is hearing impaired, `False` otherwise
|
||||
:param page_link: link to the web page from which the subtitle can be downloaded, if any
|
||||
:type page_link: string or None
|
||||
|
||||
"""
|
||||
def __init__(self, language, hearing_impaired=False, page_link=None):
|
||||
self.language = language
|
||||
self.hearing_impaired = hearing_impaired
|
||||
self.page_link = page_link
|
||||
|
||||
#: Content as bytes
|
||||
self.content = None
|
||||
|
||||
#: Encoding to decode with when accessing :attr:`text`
|
||||
self.encoding = None
|
||||
|
||||
@property
|
||||
def guessed_encoding(self):
|
||||
"""Guessed encoding using the language, falling back on chardet"""
|
||||
# always try utf-8 first
|
||||
encodings = ['utf-8']
|
||||
|
||||
# add language-specific encodings
|
||||
if self.language.alpha3 == 'zho':
|
||||
encodings.extend(['gb18030', 'big5'])
|
||||
elif self.language.alpha3 == 'jpn':
|
||||
encodings.append('shift-jis')
|
||||
elif self.language.alpha3 == 'ara':
|
||||
encodings.append('windows-1256')
|
||||
elif self.language.alpha3 == 'heb':
|
||||
encodings.append('windows-1255')
|
||||
elif self.language.alpha3 == 'tur':
|
||||
encodings.extend(['iso-8859-9', 'windows-1254'])
|
||||
else:
|
||||
encodings.append('latin-1')
|
||||
|
||||
# try to decode
|
||||
for encoding in encodings:
|
||||
try:
|
||||
self.content.decode(encoding)
|
||||
return encoding
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
# fallback on chardet
|
||||
logger.warning('Could not decode content with encodings %r', encodings)
|
||||
return chardet.detect(self.content)['encoding']
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
"""Content as string
|
||||
|
||||
If :attr:`encoding` is None, the encoding is guessed with :attr:`guessed_encoding`
|
||||
|
||||
"""
|
||||
if not self.content:
|
||||
return ''
|
||||
return self.content.decode(self.encoding or self.guessed_encoding, errors='replace')
|
||||
|
||||
@property
|
||||
def is_valid(self):
|
||||
"""Check if a subtitle text is a valid SubRip format"""
|
||||
try:
|
||||
pysrt.from_string(self.text, error_handling=pysrt.ERROR_RAISE)
|
||||
return True
|
||||
except pysrt.Error as e:
|
||||
if e.args[0] > 80:
|
||||
return True
|
||||
except:
|
||||
logger.exception('Unexpected error when validating subtitle')
|
||||
return False
|
||||
|
||||
def compute_matches(self, video):
|
||||
"""Compute the matches of the subtitle against the `video`
|
||||
|
||||
:param video: the video to compute the matches against
|
||||
:type video: :class:`~subliminal.video.Video`
|
||||
:return: matches of the subtitle
|
||||
:rtype: set
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def compute_score(self, video):
|
||||
"""Compute the score of the subtitle against the `video`
|
||||
|
||||
There are equivalent matches so that a provider can match one element or its equivalent. This is
|
||||
to give all provider a chance to have a score in the same range without hurting quality.
|
||||
|
||||
* Matching :class:`~subliminal.video.Video`'s `hashes` is equivalent to matching everything else
|
||||
* Matching :class:`~subliminal.video.Episode`'s `season` and `episode`
|
||||
is equivalent to matching :class:`~subliminal.video.Episode`'s `title`
|
||||
* Matching :class:`~subliminal.video.Episode`'s `tvdb_id` is equivalent to matching
|
||||
:class:`~subliminal.video.Episode`'s `series`
|
||||
|
||||
:param video: the video to compute the score against
|
||||
:type video: :class:`~subliminal.video.Video`
|
||||
:return: score of the subtitle
|
||||
:rtype: int
|
||||
|
||||
"""
|
||||
score = 0
|
||||
# compute matches
|
||||
initial_matches = self.compute_matches(video)
|
||||
matches = initial_matches.copy()
|
||||
# hash is the perfect match
|
||||
if 'hash' in matches:
|
||||
score = video.scores['hash']
|
||||
else:
|
||||
# remove equivalences
|
||||
if isinstance(video, Episode):
|
||||
if 'imdb_id' in matches:
|
||||
matches -= {'series', 'tvdb_id', 'season', 'episode', 'title', 'year'}
|
||||
if 'tvdb_id' in matches:
|
||||
matches -= {'series', 'year'}
|
||||
if 'title' in matches:
|
||||
matches -= {'season', 'episode'}
|
||||
# add other scores
|
||||
score += sum((video.scores[match] for match in matches))
|
||||
logger.info('Computed score %d with matches %r', score, initial_matches)
|
||||
return score
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s [%s]>' % (self.__class__.__name__, self.language)
|
||||
|
||||
|
||||
def get_subtitle_path(video_path, language=None):
|
||||
"""Create the subtitle path from the given `video_path` and `language`
|
||||
|
||||
:param string video_path: path to the video
|
||||
:param language: language of the subtitle to put in the path
|
||||
:type language: :class:`babelfish.Language` or None
|
||||
:return: path of the subtitle
|
||||
:rtype: string
|
||||
|
||||
"""
|
||||
subtitle_path = os.path.splitext(video_path)[0]
|
||||
if language is not None:
|
||||
try:
|
||||
return subtitle_path + '.%s.%s' % (language.alpha2, 'srt')
|
||||
except babelfish.LanguageConvertError:
|
||||
return subtitle_path + '.%s.%s' % (language.alpha3, 'srt')
|
||||
return subtitle_path + '.srt'
|
||||
|
||||
|
||||
def compute_guess_matches(video, guess):
|
||||
"""Compute matches between a `video` and a `guess`
|
||||
|
||||
:param video: the video to compute the matches on
|
||||
:type video: :class:`~subliminal.video.Video`
|
||||
:param guess: the guess to compute the matches on
|
||||
:type guess: :class:`guessit.Guess`
|
||||
:return: matches of the `guess`
|
||||
:rtype: set
|
||||
|
||||
"""
|
||||
matches = set()
|
||||
if isinstance(video, Episode):
|
||||
# series
|
||||
if video.series and 'series' in guess and guess['series'].lower() == video.series.lower():
|
||||
matches.add('series')
|
||||
# season
|
||||
if video.season and 'seasonNumber' in guess and guess['seasonNumber'] == video.season:
|
||||
matches.add('season')
|
||||
# episode
|
||||
if video.episode and 'episodeNumber' in guess and guess['episodeNumber'] == video.episode:
|
||||
matches.add('episode')
|
||||
# year
|
||||
if video.year == guess.get('year'): # count "no year" as an information
|
||||
matches.add('year')
|
||||
elif isinstance(video, Movie):
|
||||
# year
|
||||
if video.year and 'year' in guess and guess['year'] == video.year:
|
||||
matches.add('year')
|
||||
# title
|
||||
if video.title and 'title' in guess and guess['title'].lower() == video.title.lower():
|
||||
matches.add('title')
|
||||
# release group
|
||||
if video.release_group and 'releaseGroup' in guess and guess['releaseGroup'].lower() == video.release_group.lower():
|
||||
matches.add('release_group')
|
||||
# screen size
|
||||
if video.resolution and 'screenSize' in guess and guess['screenSize'] == video.resolution:
|
||||
matches.add('resolution')
|
||||
# format
|
||||
if video.format and 'format' in guess and guess['format'].lower() == video.format.lower():
|
||||
matches.add('format')
|
||||
# video codec
|
||||
if video.video_codec and 'videoCodec' in guess and guess['videoCodec'] == video.video_codec:
|
||||
matches.add('video_codec')
|
||||
# audio codec
|
||||
if video.audio_codec and 'audioCodec' in guess and guess['audioCodec'] == video.audio_codec:
|
||||
matches.add('audio_codec')
|
||||
return matches
|
||||
|
||||
|
||||
def compute_guess_properties_matches(video, string, propertytype):
|
||||
"""Compute matches between a `video` and properties of a certain property type
|
||||
|
||||
:param video: the video to compute the matches on
|
||||
:type video: :class:`~subliminal.video.Video`
|
||||
:param string string: the string to check for a certain property type
|
||||
:param string propertytype: the type of property to check (as defined in guessit)
|
||||
:return: matches of a certain property type (but will only be 1 match because we are checking for 1 property type)
|
||||
:rtype: set
|
||||
|
||||
Supported property types: result of guessit.transfo.guess_properties.GuessProperties().supported_properties()
|
||||
[u'audioProfile',
|
||||
u'videoCodec',
|
||||
u'container',
|
||||
u'format',
|
||||
u'episodeFormat',
|
||||
u'videoApi',
|
||||
u'screenSize',
|
||||
u'videoProfile',
|
||||
u'audioChannels',
|
||||
u'other',
|
||||
u'audioCodec']
|
||||
|
||||
"""
|
||||
matches = set()
|
||||
# We only check for the property types relevant for us
|
||||
if propertytype == 'screenSize' and video.resolution:
|
||||
for prop in guess_properties(string, propertytype):
|
||||
if prop.lower() == video.resolution.lower():
|
||||
matches.add('resolution')
|
||||
elif propertytype == 'format' and video.format:
|
||||
for prop in guess_properties(string, propertytype):
|
||||
if prop.lower() == video.format.lower():
|
||||
matches.add('format')
|
||||
elif propertytype == 'videoCodec' and video.video_codec:
|
||||
for prop in guess_properties(string, propertytype):
|
||||
if prop.lower() == video.video_codec.lower():
|
||||
matches.add('video_codec')
|
||||
elif propertytype == 'audioCodec' and video.audio_codec:
|
||||
for prop in guess_properties(string, propertytype):
|
||||
if prop.lower() == video.audio_codec.lower():
|
||||
matches.add('audio_codec')
|
||||
return matches
|
||||
|
||||
|
||||
def guess_properties(string, propertytype):
|
||||
properties = set()
|
||||
if string:
|
||||
tree = guessit.matchtree.MatchTree(string)
|
||||
guessit.transfo.guess_properties.GuessProperties().process(tree)
|
||||
properties = set(n.guess[propertytype] for n in tree.nodes() if propertytype in n.guess)
|
||||
return properties
|
||||
|
||||
|
||||
def fix_line_endings(content):
|
||||
"""Fix line ending of `content` by changing it to \n
|
||||
|
||||
:param bytes content: content of the subtitle
|
||||
:return: the content with fixed line endings
|
||||
:rtype: bytes
|
||||
|
||||
"""
|
||||
return content.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
|
Loading…
Add table
Add a link
Reference in a new issue