add subliminal for subtitle download. #253

2025-07-14 01:02:55 -07:00 · 2014-06-27 17:04:44 +09:30 · 2014-06-27 17:04:44 +09:30 · c3889c01b1
commit c3889c01b1
parent 47289c903a
149 changed files with 34173 additions and 33 deletions
--- a/libs/pysrt/srtfile.py
+++ b/libs/pysrt/srtfile.py
@ -0,0 +1,312 @@
+# -*- coding: utf-8 -*-
+import os
+import sys
+import codecs
+
+try:
+    from collections import UserList
+except ImportError:
+    from UserList import UserList
+
+from itertools import chain
+from copy import copy
+
+from pysrt.srtexc import Error
+from pysrt.srtitem import SubRipItem
+from pysrt.compat import str
+
+BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'),
+        (codecs.BOM_UTF32_BE, 'utf_32_be'),
+        (codecs.BOM_UTF16_LE, 'utf_16_le'),
+        (codecs.BOM_UTF16_BE, 'utf_16_be'),
+        (codecs.BOM_UTF8, 'utf_8'))
+CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS)
+BIGGER_BOM = max(len(bom) for bom, encoding in BOMS)
+
+
+class SubRipFile(UserList, object):
+    """
+    SubRip file descriptor.
+
+    Provide a pure Python mapping on all metadata.
+
+    SubRipFile(items, eol, path, encoding)
+
+    items -> list of SubRipItem. Default to [].
+    eol -> str: end of line character. Default to linesep used in opened file
+        if any else to os.linesep.
+    path -> str: path where file will be saved. To open an existant file see
+        SubRipFile.open.
+    encoding -> str: encoding used at file save. Default to utf-8.
+    """
+    ERROR_PASS = 0
+    ERROR_LOG = 1
+    ERROR_RAISE = 2
+
+    DEFAULT_ENCODING = 'utf_8'
+
+    def __init__(self, items=None, eol=None, path=None, encoding='utf-8'):
+        UserList.__init__(self, items or [])
+        self._eol = eol
+        self.path = path
+        self.encoding = encoding
+
+    def _get_eol(self):
+        return self._eol or os.linesep
+
+    def _set_eol(self, eol):
+        self._eol = self._eol or eol
+
+    eol = property(_get_eol, _set_eol)
+
+    def slice(self, starts_before=None, starts_after=None, ends_before=None,
+              ends_after=None):
+        """
+        slice([starts_before][, starts_after][, ends_before][, ends_after]) \
+-> SubRipFile clone
+
+        All arguments are optional, and should be coercible to SubRipTime
+        object.
+
+        It reduce the set of subtitles to those that match match given time
+        constraints.
+
+        The returned set is a clone, but still contains references to original
+        subtitles. So if you shift this returned set, subs contained in the
+        original SubRipFile instance will be altered too.
+
+        Example:
+            >>> subs.slice(ends_after={'seconds': 20}).shift(seconds=2)
+        """
+        clone = copy(self)
+
+        if starts_before:
+            clone.data = (i for i in clone.data if i.start < starts_before)
+        if starts_after:
+            clone.data = (i for i in clone.data if i.start > starts_after)
+        if ends_before:
+            clone.data = (i for i in clone.data if i.end < ends_before)
+        if ends_after:
+            clone.data = (i for i in clone.data if i.end > ends_after)
+
+        clone.data = list(clone.data)
+        return clone
+
+    def at(self, timestamp=None, **kwargs):
+        """
+        at(timestamp) -> SubRipFile clone
+
+        timestamp argument should be coercible to SubRipFile object.
+
+        A specialization of slice. Return all subtiles visible at the
+        timestamp mark.
+
+        Example:
+            >>> subs.at((0, 0, 20, 0)).shift(seconds=2)
+            >>> subs.at(seconds=20).shift(seconds=2)
+        """
+        time = timestamp or kwargs
+        return self.slice(starts_before=time, ends_after=time)
+
+    def shift(self, *args, **kwargs):
+        """shift(hours, minutes, seconds, milliseconds, ratio)
+
+        Shift `start` and `end` attributes of each items of file either by
+        applying a ratio or by adding an offset.
+
+        `ratio` should be either an int or a float.
+        Example to convert subtitles from 23.9 fps to 25 fps:
+        >>> subs.shift(ratio=25/23.9)
+
+        All "time" arguments are optional and have a default value of 0.
+        Example to delay all subs from 2 seconds and half
+        >>> subs.shift(seconds=2, milliseconds=500)
+        """
+        for item in self:
+            item.shift(*args, **kwargs)
+
+    def clean_indexes(self):
+        """
+        clean_indexes()
+
+        Sort subs and reset their index attribute. Should be called after
+        destructive operations like split or such.
+        """
+        self.sort()
+        for index, item in enumerate(self):
+            item.index = index + 1
+
+    @property
+    def text(self):
+        return '\n'.join(i.text for i in self)
+
+    @classmethod
+    def open(cls, path='', encoding=None, error_handling=ERROR_PASS):
+        """
+        open([path, [encoding]])
+
+        If you do not provide any encoding, it can be detected if the file
+        contain a bit order mark, unless it is set to utf-8 as default.
+        """
+        source_file, encoding = cls._open_unicode_file(path, claimed_encoding=encoding)
+        new_file = cls(path=path, encoding=encoding)
+        new_file.read(source_file, error_handling=error_handling)
+        source_file.close()
+        return new_file
+
+    @classmethod
+    def from_string(cls, source, **kwargs):
+        """
+        from_string(source, **kwargs) -> SubRipFile
+
+        `source` -> a unicode instance or at least a str instance encoded with
+        `sys.getdefaultencoding()`
+        """
+        error_handling = kwargs.pop('error_handling', None)
+        new_file = cls(**kwargs)
+        new_file.read(source.splitlines(True), error_handling=error_handling)
+        return new_file
+
+    def read(self, source_file, error_handling=ERROR_PASS):
+        """
+        read(source_file, [error_handling])
+
+        This method parse subtitles contained in `source_file` and append them
+        to the current instance.
+
+        `source_file` -> Any iterable that yield unicode strings, like a file
+            opened with `codecs.open()` or an array of unicode.
+        """
+        self.eol = self._guess_eol(source_file)
+        self.extend(self.stream(source_file, error_handling=error_handling))
+        return self
+
+    @classmethod
+    def stream(cls, source_file, error_handling=ERROR_PASS):
+        """
+        stream(source_file, [error_handling])
+
+        This method yield SubRipItem instances a soon as they have been parsed
+        without storing them. It is a kind of SAX parser for .srt files.
+
+        `source_file` -> Any iterable that yield unicode strings, like a file
+            opened with `codecs.open()` or an array of unicode.
+
+        Example:
+            >>> import pysrt
+            >>> import codecs
+            >>> file = codecs.open('movie.srt', encoding='utf-8')
+            >>> for sub in pysrt.stream(file):
+            ...     sub.text += "\nHello !"
+            ...     print unicode(sub)
+        """
+        string_buffer = []
+        for index, line in enumerate(chain(source_file, '\n')):
+            if line.strip():
+                string_buffer.append(line)
+            else:
+                source = string_buffer
+                string_buffer = []
+                if source and all(source):
+                    try:
+                        yield SubRipItem.from_lines(source)
+                    except Error as error:
+                        error.args += (''.join(source), )
+                        cls._handle_error(error, error_handling, index)
+
+    def save(self, path=None, encoding=None, eol=None):
+        """
+        save([path][, encoding][, eol])
+
+        Use initial path if no other provided.
+        Use initial encoding if no other provided.
+        Use initial eol if no other provided.
+        """
+        path = path or self.path
+        encoding = encoding or self.encoding
+
+        save_file = codecs.open(path, 'w+', encoding=encoding)
+        self.write_into(save_file, eol=eol)
+        save_file.close()
+
+    def write_into(self, output_file, eol=None):
+        """
+        write_into(output_file [, eol])
+
+        Serialize current state into `output_file`.
+
+        `output_file` -> Any instance that respond to `write()`, typically a
+        file object
+        """
+        output_eol = eol or self.eol
+
+        for item in self:
+            string_repr = str(item)
+            if output_eol != '\n':
+                string_repr = string_repr.replace('\n', output_eol)
+            output_file.write(string_repr)
+            # Only add trailing eol if it's not already present.
+            # It was kept in the SubRipItem's text before but it really
+            # belongs here. Existing applications might give us subtitles
+            # which already contain a trailing eol though.
+            if not string_repr.endswith(2 * output_eol):
+                output_file.write(output_eol)
+
+    @classmethod
+    def _guess_eol(cls, string_iterable):
+        first_line = cls._get_first_line(string_iterable)
+        for eol in ('\r\n', '\r', '\n'):
+            if first_line.endswith(eol):
+                return eol
+        return os.linesep
+
+    @classmethod
+    def _get_first_line(cls, string_iterable):
+        if hasattr(string_iterable, 'tell'):
+            previous_position = string_iterable.tell()
+
+        try:
+            first_line = next(iter(string_iterable))
+        except StopIteration:
+            return ''
+        if hasattr(string_iterable, 'seek'):
+            string_iterable.seek(previous_position)
+
+        return first_line
+
+    @classmethod
+    def _detect_encoding(cls, path):
+        file_descriptor = open(path, 'rb')
+        first_chars = file_descriptor.read(BIGGER_BOM)
+        file_descriptor.close()
+
+        for bom, encoding in BOMS:
+            if first_chars.startswith(bom):
+                return encoding
+
+        # TODO: maybe a chardet integration
+        return cls.DEFAULT_ENCODING
+
+    @classmethod
+    def _open_unicode_file(cls, path, claimed_encoding=None):
+        encoding = claimed_encoding or cls._detect_encoding(path)
+        source_file = codecs.open(path, 'rU', encoding=encoding)
+
+        # get rid of BOM if any
+        possible_bom = CODECS_BOMS.get(encoding, None)
+        if possible_bom:
+            file_bom = source_file.read(len(possible_bom))
+            if not file_bom == possible_bom:
+                source_file.seek(0)  # if not rewind
+        return source_file, encoding
+
+    @classmethod
+    def _handle_error(cls, error, error_handling, index):
+        if error_handling == cls.ERROR_RAISE:
+            error.args = (index, ) + error.args
+            raise error
+        if error_handling == cls.ERROR_LOG:
+            name = type(error).__name__
+            sys.stderr.write('PySRT-%s(line %s): \n' % (name, index))
+            sys.stderr.write(error.args[0].encode('ascii', 'replace'))
+            sys.stderr.write('\n')