mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-07-14 01:02:55 -07:00
add subliminal for subtitle download. #253
This commit is contained in:
parent
47289c903a
commit
c3889c01b1
149 changed files with 34173 additions and 33 deletions
312
libs/pysrt/srtfile.py
Normal file
312
libs/pysrt/srtfile.py
Normal file
|
@ -0,0 +1,312 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import sys
|
||||
import codecs
|
||||
|
||||
try:
|
||||
from collections import UserList
|
||||
except ImportError:
|
||||
from UserList import UserList
|
||||
|
||||
from itertools import chain
|
||||
from copy import copy
|
||||
|
||||
from pysrt.srtexc import Error
|
||||
from pysrt.srtitem import SubRipItem
|
||||
from pysrt.compat import str
|
||||
|
||||
BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'),
|
||||
(codecs.BOM_UTF32_BE, 'utf_32_be'),
|
||||
(codecs.BOM_UTF16_LE, 'utf_16_le'),
|
||||
(codecs.BOM_UTF16_BE, 'utf_16_be'),
|
||||
(codecs.BOM_UTF8, 'utf_8'))
|
||||
CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS)
|
||||
BIGGER_BOM = max(len(bom) for bom, encoding in BOMS)
|
||||
|
||||
|
||||
class SubRipFile(UserList, object):
|
||||
"""
|
||||
SubRip file descriptor.
|
||||
|
||||
Provide a pure Python mapping on all metadata.
|
||||
|
||||
SubRipFile(items, eol, path, encoding)
|
||||
|
||||
items -> list of SubRipItem. Default to [].
|
||||
eol -> str: end of line character. Default to linesep used in opened file
|
||||
if any else to os.linesep.
|
||||
path -> str: path where file will be saved. To open an existant file see
|
||||
SubRipFile.open.
|
||||
encoding -> str: encoding used at file save. Default to utf-8.
|
||||
"""
|
||||
ERROR_PASS = 0
|
||||
ERROR_LOG = 1
|
||||
ERROR_RAISE = 2
|
||||
|
||||
DEFAULT_ENCODING = 'utf_8'
|
||||
|
||||
def __init__(self, items=None, eol=None, path=None, encoding='utf-8'):
|
||||
UserList.__init__(self, items or [])
|
||||
self._eol = eol
|
||||
self.path = path
|
||||
self.encoding = encoding
|
||||
|
||||
def _get_eol(self):
|
||||
return self._eol or os.linesep
|
||||
|
||||
def _set_eol(self, eol):
|
||||
self._eol = self._eol or eol
|
||||
|
||||
eol = property(_get_eol, _set_eol)
|
||||
|
||||
def slice(self, starts_before=None, starts_after=None, ends_before=None,
|
||||
ends_after=None):
|
||||
"""
|
||||
slice([starts_before][, starts_after][, ends_before][, ends_after]) \
|
||||
-> SubRipFile clone
|
||||
|
||||
All arguments are optional, and should be coercible to SubRipTime
|
||||
object.
|
||||
|
||||
It reduce the set of subtitles to those that match match given time
|
||||
constraints.
|
||||
|
||||
The returned set is a clone, but still contains references to original
|
||||
subtitles. So if you shift this returned set, subs contained in the
|
||||
original SubRipFile instance will be altered too.
|
||||
|
||||
Example:
|
||||
>>> subs.slice(ends_after={'seconds': 20}).shift(seconds=2)
|
||||
"""
|
||||
clone = copy(self)
|
||||
|
||||
if starts_before:
|
||||
clone.data = (i for i in clone.data if i.start < starts_before)
|
||||
if starts_after:
|
||||
clone.data = (i for i in clone.data if i.start > starts_after)
|
||||
if ends_before:
|
||||
clone.data = (i for i in clone.data if i.end < ends_before)
|
||||
if ends_after:
|
||||
clone.data = (i for i in clone.data if i.end > ends_after)
|
||||
|
||||
clone.data = list(clone.data)
|
||||
return clone
|
||||
|
||||
def at(self, timestamp=None, **kwargs):
|
||||
"""
|
||||
at(timestamp) -> SubRipFile clone
|
||||
|
||||
timestamp argument should be coercible to SubRipFile object.
|
||||
|
||||
A specialization of slice. Return all subtiles visible at the
|
||||
timestamp mark.
|
||||
|
||||
Example:
|
||||
>>> subs.at((0, 0, 20, 0)).shift(seconds=2)
|
||||
>>> subs.at(seconds=20).shift(seconds=2)
|
||||
"""
|
||||
time = timestamp or kwargs
|
||||
return self.slice(starts_before=time, ends_after=time)
|
||||
|
||||
def shift(self, *args, **kwargs):
|
||||
"""shift(hours, minutes, seconds, milliseconds, ratio)
|
||||
|
||||
Shift `start` and `end` attributes of each items of file either by
|
||||
applying a ratio or by adding an offset.
|
||||
|
||||
`ratio` should be either an int or a float.
|
||||
Example to convert subtitles from 23.9 fps to 25 fps:
|
||||
>>> subs.shift(ratio=25/23.9)
|
||||
|
||||
All "time" arguments are optional and have a default value of 0.
|
||||
Example to delay all subs from 2 seconds and half
|
||||
>>> subs.shift(seconds=2, milliseconds=500)
|
||||
"""
|
||||
for item in self:
|
||||
item.shift(*args, **kwargs)
|
||||
|
||||
def clean_indexes(self):
|
||||
"""
|
||||
clean_indexes()
|
||||
|
||||
Sort subs and reset their index attribute. Should be called after
|
||||
destructive operations like split or such.
|
||||
"""
|
||||
self.sort()
|
||||
for index, item in enumerate(self):
|
||||
item.index = index + 1
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return '\n'.join(i.text for i in self)
|
||||
|
||||
@classmethod
|
||||
def open(cls, path='', encoding=None, error_handling=ERROR_PASS):
|
||||
"""
|
||||
open([path, [encoding]])
|
||||
|
||||
If you do not provide any encoding, it can be detected if the file
|
||||
contain a bit order mark, unless it is set to utf-8 as default.
|
||||
"""
|
||||
source_file, encoding = cls._open_unicode_file(path, claimed_encoding=encoding)
|
||||
new_file = cls(path=path, encoding=encoding)
|
||||
new_file.read(source_file, error_handling=error_handling)
|
||||
source_file.close()
|
||||
return new_file
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, source, **kwargs):
|
||||
"""
|
||||
from_string(source, **kwargs) -> SubRipFile
|
||||
|
||||
`source` -> a unicode instance or at least a str instance encoded with
|
||||
`sys.getdefaultencoding()`
|
||||
"""
|
||||
error_handling = kwargs.pop('error_handling', None)
|
||||
new_file = cls(**kwargs)
|
||||
new_file.read(source.splitlines(True), error_handling=error_handling)
|
||||
return new_file
|
||||
|
||||
def read(self, source_file, error_handling=ERROR_PASS):
|
||||
"""
|
||||
read(source_file, [error_handling])
|
||||
|
||||
This method parse subtitles contained in `source_file` and append them
|
||||
to the current instance.
|
||||
|
||||
`source_file` -> Any iterable that yield unicode strings, like a file
|
||||
opened with `codecs.open()` or an array of unicode.
|
||||
"""
|
||||
self.eol = self._guess_eol(source_file)
|
||||
self.extend(self.stream(source_file, error_handling=error_handling))
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def stream(cls, source_file, error_handling=ERROR_PASS):
|
||||
"""
|
||||
stream(source_file, [error_handling])
|
||||
|
||||
This method yield SubRipItem instances a soon as they have been parsed
|
||||
without storing them. It is a kind of SAX parser for .srt files.
|
||||
|
||||
`source_file` -> Any iterable that yield unicode strings, like a file
|
||||
opened with `codecs.open()` or an array of unicode.
|
||||
|
||||
Example:
|
||||
>>> import pysrt
|
||||
>>> import codecs
|
||||
>>> file = codecs.open('movie.srt', encoding='utf-8')
|
||||
>>> for sub in pysrt.stream(file):
|
||||
... sub.text += "\nHello !"
|
||||
... print unicode(sub)
|
||||
"""
|
||||
string_buffer = []
|
||||
for index, line in enumerate(chain(source_file, '\n')):
|
||||
if line.strip():
|
||||
string_buffer.append(line)
|
||||
else:
|
||||
source = string_buffer
|
||||
string_buffer = []
|
||||
if source and all(source):
|
||||
try:
|
||||
yield SubRipItem.from_lines(source)
|
||||
except Error as error:
|
||||
error.args += (''.join(source), )
|
||||
cls._handle_error(error, error_handling, index)
|
||||
|
||||
def save(self, path=None, encoding=None, eol=None):
|
||||
"""
|
||||
save([path][, encoding][, eol])
|
||||
|
||||
Use initial path if no other provided.
|
||||
Use initial encoding if no other provided.
|
||||
Use initial eol if no other provided.
|
||||
"""
|
||||
path = path or self.path
|
||||
encoding = encoding or self.encoding
|
||||
|
||||
save_file = codecs.open(path, 'w+', encoding=encoding)
|
||||
self.write_into(save_file, eol=eol)
|
||||
save_file.close()
|
||||
|
||||
def write_into(self, output_file, eol=None):
|
||||
"""
|
||||
write_into(output_file [, eol])
|
||||
|
||||
Serialize current state into `output_file`.
|
||||
|
||||
`output_file` -> Any instance that respond to `write()`, typically a
|
||||
file object
|
||||
"""
|
||||
output_eol = eol or self.eol
|
||||
|
||||
for item in self:
|
||||
string_repr = str(item)
|
||||
if output_eol != '\n':
|
||||
string_repr = string_repr.replace('\n', output_eol)
|
||||
output_file.write(string_repr)
|
||||
# Only add trailing eol if it's not already present.
|
||||
# It was kept in the SubRipItem's text before but it really
|
||||
# belongs here. Existing applications might give us subtitles
|
||||
# which already contain a trailing eol though.
|
||||
if not string_repr.endswith(2 * output_eol):
|
||||
output_file.write(output_eol)
|
||||
|
||||
@classmethod
|
||||
def _guess_eol(cls, string_iterable):
|
||||
first_line = cls._get_first_line(string_iterable)
|
||||
for eol in ('\r\n', '\r', '\n'):
|
||||
if first_line.endswith(eol):
|
||||
return eol
|
||||
return os.linesep
|
||||
|
||||
@classmethod
|
||||
def _get_first_line(cls, string_iterable):
|
||||
if hasattr(string_iterable, 'tell'):
|
||||
previous_position = string_iterable.tell()
|
||||
|
||||
try:
|
||||
first_line = next(iter(string_iterable))
|
||||
except StopIteration:
|
||||
return ''
|
||||
if hasattr(string_iterable, 'seek'):
|
||||
string_iterable.seek(previous_position)
|
||||
|
||||
return first_line
|
||||
|
||||
@classmethod
|
||||
def _detect_encoding(cls, path):
|
||||
file_descriptor = open(path, 'rb')
|
||||
first_chars = file_descriptor.read(BIGGER_BOM)
|
||||
file_descriptor.close()
|
||||
|
||||
for bom, encoding in BOMS:
|
||||
if first_chars.startswith(bom):
|
||||
return encoding
|
||||
|
||||
# TODO: maybe a chardet integration
|
||||
return cls.DEFAULT_ENCODING
|
||||
|
||||
@classmethod
|
||||
def _open_unicode_file(cls, path, claimed_encoding=None):
|
||||
encoding = claimed_encoding or cls._detect_encoding(path)
|
||||
source_file = codecs.open(path, 'rU', encoding=encoding)
|
||||
|
||||
# get rid of BOM if any
|
||||
possible_bom = CODECS_BOMS.get(encoding, None)
|
||||
if possible_bom:
|
||||
file_bom = source_file.read(len(possible_bom))
|
||||
if not file_bom == possible_bom:
|
||||
source_file.seek(0) # if not rewind
|
||||
return source_file, encoding
|
||||
|
||||
@classmethod
|
||||
def _handle_error(cls, error, error_handling, index):
|
||||
if error_handling == cls.ERROR_RAISE:
|
||||
error.args = (index, ) + error.args
|
||||
raise error
|
||||
if error_handling == cls.ERROR_LOG:
|
||||
name = type(error).__name__
|
||||
sys.stderr.write('PySRT-%s(line %s): \n' % (name, index))
|
||||
sys.stderr.write(error.args[0].encode('ascii', 'replace'))
|
||||
sys.stderr.write('\n')
|
Loading…
Add table
Add a link
Reference in a new issue