mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-07-06 04:51:48 -07:00
Merge branch 'master' into strip
This commit is contained in:
commit
e8d433c359
21 changed files with 1109 additions and 493 deletions
13
.github/workflows/ci.yml
vendored
13
.github/workflows/ci.yml
vendored
|
@ -116,7 +116,7 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-20.04]
|
os: [ubuntu-22.04]
|
||||||
python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
|
python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
|
||||||
python-impl: [cpython]
|
python-impl: [cpython]
|
||||||
ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
|
ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
|
||||||
|
@ -133,12 +133,12 @@ jobs:
|
||||||
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
||||||
run-tests-ext: bat
|
run-tests-ext: bat
|
||||||
# jython
|
# jython
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-22.04
|
||||||
python-version: 2.7
|
python-version: 2.7
|
||||||
python-impl: jython
|
python-impl: jython
|
||||||
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
|
||||||
run-tests-ext: sh
|
run-tests-ext: sh
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-22.04
|
||||||
python-version: 2.7
|
python-version: 2.7
|
||||||
python-impl: jython
|
python-impl: jython
|
||||||
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
||||||
|
@ -160,7 +160,7 @@ jobs:
|
||||||
# NB may run apt-get install in Linux
|
# NB may run apt-get install in Linux
|
||||||
uses: ytdl-org/setup-python@v1
|
uses: ytdl-org/setup-python@v1
|
||||||
env:
|
env:
|
||||||
# Temporary workaround for Python 3.5 failures - May 2024
|
# Temporary (?) workaround for Python 3.5 failures - May 2024
|
||||||
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
|
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
@ -240,7 +240,10 @@ jobs:
|
||||||
# install 2.7
|
# install 2.7
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get install -y python2 python-is-python2
|
# Ubuntu 22.04 no longer has python-is-python2: fetch it
|
||||||
|
curl -L "http://launchpadlibrarian.net/474693132/python-is-python2_2.7.17-4_all.deb" -o python-is-python2.deb
|
||||||
|
sudo apt-get install -y python2
|
||||||
|
sudo dpkg --force-breaks -i python-is-python2.deb
|
||||||
echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
|
echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
|
||||||
#-------- Python 2.6 --
|
#-------- Python 2.6 --
|
||||||
- name: Set up Python 2.6 environment
|
- name: Set up Python 2.6 environment
|
||||||
|
|
12
setup.py
12
setup.py
|
@ -33,8 +33,13 @@ py2exe_options = {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Get the version from youtube_dl/version.py without importing the package
|
# Get the version from youtube_dl/version.py without importing the package
|
||||||
exec(compile(open('youtube_dl/version.py').read(),
|
exec(
|
||||||
'youtube_dl/version.py', 'exec'))
|
compile(
|
||||||
|
open('youtube_dl/version.py').read(),
|
||||||
|
'youtube_dl/version.py',
|
||||||
|
'exec',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
DESCRIPTION = 'YouTube video downloader'
|
DESCRIPTION = 'YouTube video downloader'
|
||||||
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
|
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
|
||||||
|
@ -125,9 +130,6 @@ setup(
|
||||||
'Environment :: Console',
|
'Environment :: Console',
|
||||||
'License :: Public Domain',
|
'License :: Public Domain',
|
||||||
'Programming Language :: Python',
|
'Programming Language :: Python',
|
||||||
'Programming Language :: Python :: 2',
|
|
||||||
'Programming Language :: Python :: 2.6',
|
|
||||||
'Programming Language :: Python :: 2.7',
|
|
||||||
'Programming Language :: Python :: 3',
|
'Programming Language :: Python :: 3',
|
||||||
'Programming Language :: Python :: 3.2',
|
'Programming Language :: Python :: 3.2',
|
||||||
'Programming Language :: Python :: 3.3',
|
'Programming Language :: Python :: 3.3',
|
||||||
|
|
2
tox.ini
2
tox.ini
|
@ -1,5 +1,5 @@
|
||||||
[tox]
|
[tox]
|
||||||
envlist = py26,py27,py33,py34,py35
|
envlist = py33,py34,py35
|
||||||
[testenv]
|
[testenv]
|
||||||
deps =
|
deps =
|
||||||
nose
|
nose
|
||||||
|
|
|
@ -12,6 +12,7 @@ import io
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import locale
|
import locale
|
||||||
|
import logging
|
||||||
import operator
|
import operator
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
|
@ -24,6 +25,8 @@ import time
|
||||||
import tokenize
|
import tokenize
|
||||||
import traceback
|
import traceback
|
||||||
import random
|
import random
|
||||||
|
from typing import Any
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from ssl import OPENSSL_VERSION
|
from ssl import OPENSSL_VERSION
|
||||||
|
@ -130,6 +133,10 @@ from .version import __version__
|
||||||
if compat_os_name == 'nt':
|
if compat_os_name == 'nt':
|
||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
|
logger = logging.getLogger('soundcloudutil.downloader')
|
||||||
|
|
||||||
|
TAGGED_LOG_MSG_REGEX = re.compile(r'^\[(?P<tag>\w+)(:(?P<subtag>\w+))?\]\s*(?P<msg>.+)$')
|
||||||
|
|
||||||
|
|
||||||
def _catch_unsafe_file_extension(func):
|
def _catch_unsafe_file_extension(func):
|
||||||
@functools.wraps(func)
|
@functools.wraps(func)
|
||||||
|
@ -494,27 +501,66 @@ class YoutubeDL(object):
|
||||||
"""Add the progress hook (currently only for the file downloader)"""
|
"""Add the progress hook (currently only for the file downloader)"""
|
||||||
self._progress_hooks.append(ph)
|
self._progress_hooks.append(ph)
|
||||||
|
|
||||||
def _write_string(self, s, out=None):
|
def _bidi_workaround(self, message):
|
||||||
|
if not hasattr(self, '_output_channel'):
|
||||||
|
return message
|
||||||
|
|
||||||
|
assert hasattr(self, '_output_process')
|
||||||
|
assert isinstance(message, compat_str)
|
||||||
|
line_count = message.count('\n') + 1
|
||||||
|
self._output_process.stdin.write((message + '\n').encode('utf-8'))
|
||||||
|
self._output_process.stdin.flush()
|
||||||
|
res = ''.join(self._output_channel.readline().decode('utf-8')
|
||||||
|
for _ in range(line_count))
|
||||||
|
return res[:-len('\n')]
|
||||||
|
|
||||||
|
def to_screen(self, message, skip_eol: bool = False):
|
||||||
|
"""Print message to stdout if not in quiet mode."""
|
||||||
|
return self.to_stdout(message, skip_eol, check_quiet=True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def user_logger(self) -> logging.Logger | None:
|
||||||
|
return cast(logging.Logger | None, self.params.get('logger'))
|
||||||
|
|
||||||
|
def _write_string(self, s: str, out: io.TextIOWrapper | None = None) -> None:
|
||||||
write_string(s, out=out, encoding=self.params.get('encoding'))
|
write_string(s, out=out, encoding=self.params.get('encoding'))
|
||||||
|
|
||||||
def to_stdout(self, message, skip_eol=False, check_quiet=False):
|
def to_stdout(self, message, skip_eol: bool = False, check_quiet: bool = False):
|
||||||
"""Print message to stdout if not in quiet mode."""
|
"""Print message to stdout if not in quiet mode."""
|
||||||
if self.params.get('logger'):
|
quiet = check_quiet and self.params.get('quiet', False)
|
||||||
self.params['logger'].debug(message)
|
|
||||||
elif not check_quiet or not self.params.get('quiet', False):
|
|
||||||
terminator = ['\n', ''][skip_eol]
|
|
||||||
output = message + terminator
|
|
||||||
|
|
||||||
self._write_string(output, self._screen_file)
|
debug: bool
|
||||||
|
if message.startswith(f'[debug]'):
|
||||||
def to_stderr(self, message):
|
debug = True
|
||||||
"""Print message to stderr."""
|
message = message.removeprefix('[debug]').lstrip()
|
||||||
assert isinstance(message, compat_str)
|
elif message.startswith('[info]'):
|
||||||
if self.params.get('logger'):
|
debug = False
|
||||||
self.params['logger'].error(message)
|
message = message.removeprefix('[info]').lstrip()
|
||||||
|
elif quiet:
|
||||||
|
debug = True
|
||||||
else:
|
else:
|
||||||
output = message + '\n'
|
debug = False
|
||||||
self._write_string(output, self._err_file)
|
|
||||||
|
_logger = logger
|
||||||
|
if m := TAGGED_LOG_MSG_REGEX.match(message):
|
||||||
|
tag = m.group('tag')
|
||||||
|
subtag = m.group('subtag')
|
||||||
|
_logger_name = f'youtube_dl.{tag}'
|
||||||
|
if m.group('subtag'):
|
||||||
|
_logger_name += f'.{subtag}'
|
||||||
|
_logger = logging.getLogger(_logger_name)
|
||||||
|
message = m.group('msg')
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
_logger.debug(message)
|
||||||
|
else:
|
||||||
|
_logger.info(message)
|
||||||
|
|
||||||
|
def to_stderr(self, message: str) -> None:
|
||||||
|
if self.user_logger is not None:
|
||||||
|
self.user_logger.error(message)
|
||||||
|
else:
|
||||||
|
logger.error(message)
|
||||||
|
|
||||||
def to_screen(self, message, skip_eol=False):
|
def to_screen(self, message, skip_eol=False):
|
||||||
"""Print message to stdout if not in quiet mode."""
|
"""Print message to stdout if not in quiet mode."""
|
||||||
|
@ -558,11 +604,8 @@ class YoutubeDL(object):
|
||||||
raise DownloadError(message, exc_info)
|
raise DownloadError(message, exc_info)
|
||||||
self._download_retcode = 1
|
self._download_retcode = 1
|
||||||
|
|
||||||
def report_warning(self, message, only_once=False, _cache={}):
|
def report_warning(self, message: str, only_once: bool = False, _cache: dict[int, int] | None = None) -> None:
|
||||||
'''
|
_cache = _cache or {}
|
||||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
|
||||||
If stderr is a tty file the 'WARNING:' will be colored
|
|
||||||
'''
|
|
||||||
if only_once:
|
if only_once:
|
||||||
m_hash = hash((self, message))
|
m_hash = hash((self, message))
|
||||||
m_cnt = _cache.setdefault(m_hash, 0)
|
m_cnt = _cache.setdefault(m_hash, 0)
|
||||||
|
@ -570,68 +613,28 @@ class YoutubeDL(object):
|
||||||
if m_cnt > 0:
|
if m_cnt > 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('logger') is not None:
|
if self.user_logger is not None:
|
||||||
self.params['logger'].warning(message)
|
self.user_logger.warning(message)
|
||||||
else:
|
else:
|
||||||
if self.params.get('no_warnings'):
|
if self.params.get('no_warnings'):
|
||||||
return
|
return
|
||||||
if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
|
logger.warning(message)
|
||||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
|
||||||
else:
|
|
||||||
_msg_header = 'WARNING:'
|
|
||||||
warning_message = '%s %s' % (_msg_header, message)
|
|
||||||
self.to_stderr(warning_message)
|
|
||||||
|
|
||||||
def report_error(self, message, *args, **kwargs):
|
# TODO: re-implement :meth:`trouble` to output tracebacks with RichHandler
|
||||||
'''
|
def report_error(self, message: str, *args: Any, **kwargs: Any) -> None:
|
||||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
logger.error(message)
|
||||||
in red if stderr is a tty file.
|
kwargs['message'] = f'ERROR: {message}'
|
||||||
'''
|
|
||||||
if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
|
|
||||||
_msg_header = '\033[0;31mERROR:\033[0m'
|
|
||||||
else:
|
|
||||||
_msg_header = 'ERROR:'
|
|
||||||
kwargs['message'] = '%s %s' % (_msg_header, message)
|
|
||||||
self.trouble(*args, **kwargs)
|
self.trouble(*args, **kwargs)
|
||||||
|
|
||||||
def to_console_title(self, message):
|
def write_debug(self, message, only_once=False):
|
||||||
if not self.params.get('consoletitle', False):
|
'''Log debug message or Print message to stderr'''
|
||||||
|
if not self.params.get('verbose', False):
|
||||||
return
|
return
|
||||||
if compat_os_name == 'nt':
|
message = '[debug] {0}'.format(message)
|
||||||
if ctypes.windll.kernel32.GetConsoleWindow():
|
if self.params.get('logger'):
|
||||||
# c_wchar_p() might not be necessary if `message` is
|
self.params['logger'].debug(message)
|
||||||
# already of type unicode()
|
else:
|
||||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
self.to_stderr(message, only_once)
|
||||||
elif 'TERM' in os.environ:
|
|
||||||
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
|
||||||
|
|
||||||
def save_console_title(self):
|
|
||||||
if not self.params.get('consoletitle', False):
|
|
||||||
return
|
|
||||||
if self.params.get('simulate', False):
|
|
||||||
return
|
|
||||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
|
||||||
# Save the title on stack
|
|
||||||
self._write_string('\033[22;0t', self._screen_file)
|
|
||||||
|
|
||||||
def restore_console_title(self):
|
|
||||||
if not self.params.get('consoletitle', False):
|
|
||||||
return
|
|
||||||
if self.params.get('simulate', False):
|
|
||||||
return
|
|
||||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
|
||||||
# Restore the title from stack
|
|
||||||
self._write_string('\033[23;0t', self._screen_file)
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
self.save_console_title()
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, *args):
|
|
||||||
self.restore_console_title()
|
|
||||||
|
|
||||||
if self.params.get('cookiefile') is not None:
|
|
||||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
|
||||||
|
|
||||||
def report_unscoped_cookies(self, *args, **kwargs):
|
def report_unscoped_cookies(self, *args, **kwargs):
|
||||||
# message=None, tb=False, is_error=False
|
# message=None, tb=False, is_error=False
|
||||||
|
@ -2470,7 +2473,7 @@ class YoutubeDL(object):
|
||||||
self.get_encoding()))
|
self.get_encoding()))
|
||||||
write_string(encoding_str, encoding=None)
|
write_string(encoding_str, encoding=None)
|
||||||
|
|
||||||
writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
|
writeln_debug = lambda *s: self.write_debug(''.join(s))
|
||||||
writeln_debug('youtube-dl version ', __version__)
|
writeln_debug('youtube-dl version ', __version__)
|
||||||
if _LAZY_LOADER:
|
if _LAZY_LOADER:
|
||||||
writeln_debug('Lazy loading extractors enabled')
|
writeln_debug('Lazy loading extractors enabled')
|
||||||
|
@ -2612,7 +2615,13 @@ class YoutubeDL(object):
|
||||||
encoding = preferredencoding()
|
encoding = preferredencoding()
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
def _write_info_json(self, label, info_dict, infofn, overwrite=None):
|
def _write_info_json(
|
||||||
|
self,
|
||||||
|
label: str,
|
||||||
|
info_dict: dict[str, Any],
|
||||||
|
infofn: str,
|
||||||
|
overwrite: bool | None = None,
|
||||||
|
) -> bool | str | None:
|
||||||
if not self.params.get('writeinfojson', False):
|
if not self.params.get('writeinfojson', False):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -2632,7 +2641,7 @@ class YoutubeDL(object):
|
||||||
return True
|
return True
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(msg('Cannot write %s to JSON file ', label) + infofn)
|
self.report_error(msg('Cannot write %s to JSON file ', label) + infofn)
|
||||||
return
|
return None
|
||||||
|
|
||||||
def _write_thumbnails(self, info_dict, filename):
|
def _write_thumbnails(self, info_dict, filename):
|
||||||
if self.params.get('writethumbnail', False):
|
if self.params.get('writethumbnail', False):
|
||||||
|
|
|
@ -18,7 +18,7 @@ from .compat import (
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_register_utf8,
|
compat_register_utf8,
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
workaround_optparse_bug9161,
|
_workaround_optparse_bug9161,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
_UnsafeExtensionError,
|
_UnsafeExtensionError,
|
||||||
|
@ -50,7 +50,7 @@ def _real_main(argv=None):
|
||||||
# Compatibility fix for Windows
|
# Compatibility fix for Windows
|
||||||
compat_register_utf8()
|
compat_register_utf8()
|
||||||
|
|
||||||
workaround_optparse_bug9161()
|
_workaround_optparse_bug9161()
|
||||||
|
|
||||||
setproctitle('youtube-dl')
|
setproctitle('youtube-dl')
|
||||||
|
|
||||||
|
@ -287,6 +287,10 @@ def _real_main(argv=None):
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
'key': 'FFmpegEmbedSubtitle',
|
'key': 'FFmpegEmbedSubtitle',
|
||||||
})
|
})
|
||||||
|
if opts.aacToMp3:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'ConvertAACToMP3PP',
|
||||||
|
})
|
||||||
if opts.embedthumbnail:
|
if opts.embedthumbnail:
|
||||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import errno
|
import errno
|
||||||
|
@ -10,12 +11,14 @@ import traceback
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_open as open,
|
compat_open as open,
|
||||||
|
compat_os_makedirs,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
|
escape_rfc3986,
|
||||||
expand_path,
|
expand_path,
|
||||||
is_outdated_version,
|
is_outdated_version,
|
||||||
try_get,
|
traverse_obj,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
)
|
)
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
@ -30,23 +33,35 @@ class Cache(object):
|
||||||
def __init__(self, ydl):
|
def __init__(self, ydl):
|
||||||
self._ydl = ydl
|
self._ydl = ydl
|
||||||
|
|
||||||
|
def _write_debug(self, *args, **kwargs):
|
||||||
|
self._ydl.write_debug(*args, **kwargs)
|
||||||
|
|
||||||
|
def _report_warning(self, *args, **kwargs):
|
||||||
|
self._ydl.report_warning(*args, **kwargs)
|
||||||
|
|
||||||
|
def _to_screen(self, *args, **kwargs):
|
||||||
|
self._ydl.to_screen(*args, **kwargs)
|
||||||
|
|
||||||
|
def _get_param(self, k, default=None):
|
||||||
|
return self._ydl.params.get(k, default)
|
||||||
|
|
||||||
def _get_root_dir(self):
|
def _get_root_dir(self):
|
||||||
res = self._ydl.params.get('cachedir')
|
res = self._get_param('cachedir')
|
||||||
if res is None:
|
if res is None:
|
||||||
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
||||||
res = os.path.join(cache_root, self._YTDL_DIR)
|
res = os.path.join(cache_root, self._YTDL_DIR)
|
||||||
return expand_path(res)
|
return expand_path(res)
|
||||||
|
|
||||||
def _get_cache_fn(self, section, key, dtype):
|
def _get_cache_fn(self, section, key, dtype):
|
||||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
|
assert re.match(r'^[\w.-]+$', section), \
|
||||||
'invalid section %r' % section
|
'invalid section %r' % section
|
||||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
|
key = escape_rfc3986(key, safe='').replace('%', ',') # encode non-ascii characters
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self._get_root_dir(), section, '%s.%s' % (key, dtype))
|
self._get_root_dir(), section, '%s.%s' % (key, dtype))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def enabled(self):
|
def enabled(self):
|
||||||
return self._ydl.params.get('cachedir') is not False
|
return self._get_param('cachedir') is not False
|
||||||
|
|
||||||
def store(self, section, key, data, dtype='json'):
|
def store(self, section, key, data, dtype='json'):
|
||||||
assert dtype in ('json',)
|
assert dtype in ('json',)
|
||||||
|
@ -56,61 +71,75 @@ class Cache(object):
|
||||||
|
|
||||||
fn = self._get_cache_fn(section, key, dtype)
|
fn = self._get_cache_fn(section, key, dtype)
|
||||||
try:
|
try:
|
||||||
try:
|
compat_os_makedirs(os.path.dirname(fn), exist_ok=True)
|
||||||
os.makedirs(os.path.dirname(fn))
|
self._write_debug('Saving {section}.{key} to cache'.format(section=section, key=key))
|
||||||
except OSError as ose:
|
|
||||||
if ose.errno != errno.EEXIST:
|
|
||||||
raise
|
|
||||||
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
|
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
|
||||||
except Exception:
|
except Exception:
|
||||||
tb = traceback.format_exc()
|
tb = traceback.format_exc()
|
||||||
self._ydl.report_warning(
|
self._report_warning('Writing cache to {fn!r} failed: {tb}'.format(fn=fn, tb=tb))
|
||||||
'Writing cache to %r failed: %s' % (fn, tb))
|
|
||||||
|
def clear(self, section, key, dtype='json'):
|
||||||
|
|
||||||
|
if not self.enabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
fn = self._get_cache_fn(section, key, dtype)
|
||||||
|
self._write_debug('Clearing {section}.{key} from cache'.format(section=section, key=key))
|
||||||
|
try:
|
||||||
|
os.remove(fn)
|
||||||
|
except Exception as e:
|
||||||
|
if getattr(e, 'errno') == errno.ENOENT:
|
||||||
|
# file not found
|
||||||
|
return
|
||||||
|
tb = traceback.format_exc()
|
||||||
|
self._report_warning('Clearing cache from {fn!r} failed: {tb}'.format(fn=fn, tb=tb))
|
||||||
|
|
||||||
def _validate(self, data, min_ver):
|
def _validate(self, data, min_ver):
|
||||||
version = try_get(data, lambda x: x[self._VERSION_KEY])
|
version = traverse_obj(data, self._VERSION_KEY)
|
||||||
if not version: # Backward compatibility
|
if not version: # Backward compatibility
|
||||||
data, version = {'data': data}, self._DEFAULT_VERSION
|
data, version = {'data': data}, self._DEFAULT_VERSION
|
||||||
if not is_outdated_version(version, min_ver or '0', assume_new=False):
|
if not is_outdated_version(version, min_ver or '0', assume_new=False):
|
||||||
return data['data']
|
return data['data']
|
||||||
self._ydl.to_screen(
|
self._write_debug('Discarding old cache from version {version} (needs {min_ver})'.format(version=version, min_ver=min_ver))
|
||||||
'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
|
|
||||||
|
|
||||||
def load(self, section, key, dtype='json', default=None, min_ver=None):
|
def load(self, section, key, dtype='json', default=None, **kw_min_ver):
|
||||||
assert dtype in ('json',)
|
assert dtype in ('json',)
|
||||||
|
min_ver = kw_min_ver.get('min_ver')
|
||||||
|
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
return default
|
return default
|
||||||
|
|
||||||
cache_fn = self._get_cache_fn(section, key, dtype)
|
cache_fn = self._get_cache_fn(section, key, dtype)
|
||||||
try:
|
try:
|
||||||
|
with open(cache_fn, encoding='utf-8') as cachef:
|
||||||
|
self._write_debug('Loading {section}.{key} from cache'.format(section=section, key=key), only_once=True)
|
||||||
|
return self._validate(json.load(cachef), min_ver)
|
||||||
|
except (ValueError, KeyError):
|
||||||
try:
|
try:
|
||||||
with open(cache_fn, 'r', encoding='utf-8') as cachef:
|
file_size = 'size: %d' % os.path.getsize(cache_fn)
|
||||||
return self._validate(json.load(cachef), min_ver)
|
except (OSError, IOError) as oe:
|
||||||
except ValueError:
|
file_size = error_to_compat_str(oe)
|
||||||
try:
|
self._report_warning('Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
||||||
file_size = os.path.getsize(cache_fn)
|
except Exception as e:
|
||||||
except (OSError, IOError) as oe:
|
if getattr(e, 'errno') == errno.ENOENT:
|
||||||
file_size = error_to_compat_str(oe)
|
# no cache available
|
||||||
self._ydl.report_warning(
|
return
|
||||||
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
self._report_warning('Cache retrieval from %s failed' % (cache_fn,))
|
||||||
except IOError:
|
|
||||||
pass # No cache available
|
|
||||||
|
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def remove(self):
|
def remove(self):
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
self._to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||||
return
|
return
|
||||||
|
|
||||||
cachedir = self._get_root_dir()
|
cachedir = self._get_root_dir()
|
||||||
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
||||||
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
|
raise Exception('Not removing directory %s - this does not look like a cache dir' % (cachedir,))
|
||||||
|
|
||||||
self._ydl.to_screen(
|
self._to_screen(
|
||||||
'Removing cache dir %s .' % cachedir, skip_eol=True)
|
'Removing cache dir %s .' % (cachedir,), skip_eol=True, ),
|
||||||
if os.path.exists(cachedir):
|
if os.path.exists(cachedir):
|
||||||
self._ydl.to_screen('.', skip_eol=True)
|
self._to_screen('.', skip_eol=True)
|
||||||
shutil.rmtree(cachedir)
|
shutil.rmtree(cachedir)
|
||||||
self._ydl.to_screen('.')
|
self._to_screen('.')
|
||||||
|
|
|
@ -16,7 +16,6 @@ import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import shutil
|
|
||||||
import socket
|
import socket
|
||||||
import struct
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
|
@ -24,11 +23,15 @@ import sys
|
||||||
import types
|
import types
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
_IDENTITY = lambda x: x
|
||||||
|
|
||||||
# naming convention
|
# naming convention
|
||||||
# 'compat_' + Python3_name.replace('.', '_')
|
# 'compat_' + Python3_name.replace('.', '_')
|
||||||
# other aliases exist for convenience and/or legacy
|
# other aliases exist for convenience and/or legacy
|
||||||
|
# wrap disposable test values in type() to reclaim storage
|
||||||
|
|
||||||
# deal with critical unicode/str things first
|
# deal with critical unicode/str things first:
|
||||||
|
# compat_str, compat_basestring, compat_chr
|
||||||
try:
|
try:
|
||||||
# Python 2
|
# Python 2
|
||||||
compat_str, compat_basestring, compat_chr = (
|
compat_str, compat_basestring, compat_chr = (
|
||||||
|
@ -39,18 +42,23 @@ except NameError:
|
||||||
str, (str, bytes), chr
|
str, (str, bytes), chr
|
||||||
)
|
)
|
||||||
|
|
||||||
# casefold
|
|
||||||
|
# compat_casefold
|
||||||
try:
|
try:
|
||||||
compat_str.casefold
|
compat_str.casefold
|
||||||
compat_casefold = lambda s: s.casefold()
|
compat_casefold = lambda s: s.casefold()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
from .casefold import _casefold as compat_casefold
|
from .casefold import _casefold as compat_casefold
|
||||||
|
|
||||||
|
|
||||||
|
# compat_collections_abc
|
||||||
try:
|
try:
|
||||||
import collections.abc as compat_collections_abc
|
import collections.abc as compat_collections_abc
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import collections as compat_collections_abc
|
import collections as compat_collections_abc
|
||||||
|
|
||||||
|
|
||||||
|
# compat_urllib_request
|
||||||
try:
|
try:
|
||||||
import urllib.request as compat_urllib_request
|
import urllib.request as compat_urllib_request
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
|
@ -79,11 +87,15 @@ except TypeError:
|
||||||
_add_init_method_arg(compat_urllib_request.Request)
|
_add_init_method_arg(compat_urllib_request.Request)
|
||||||
del _add_init_method_arg
|
del _add_init_method_arg
|
||||||
|
|
||||||
|
|
||||||
|
# compat_urllib_error
|
||||||
try:
|
try:
|
||||||
import urllib.error as compat_urllib_error
|
import urllib.error as compat_urllib_error
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import urllib2 as compat_urllib_error
|
import urllib2 as compat_urllib_error
|
||||||
|
|
||||||
|
|
||||||
|
# compat_urllib_parse
|
||||||
try:
|
try:
|
||||||
import urllib.parse as compat_urllib_parse
|
import urllib.parse as compat_urllib_parse
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
|
@ -98,17 +110,23 @@ except ImportError: # Python 2
|
||||||
compat_urlparse = compat_urllib_parse
|
compat_urlparse = compat_urllib_parse
|
||||||
compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
|
compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
|
||||||
|
|
||||||
|
|
||||||
|
# compat_urllib_response
|
||||||
try:
|
try:
|
||||||
import urllib.response as compat_urllib_response
|
import urllib.response as compat_urllib_response
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import urllib as compat_urllib_response
|
import urllib as compat_urllib_response
|
||||||
|
|
||||||
|
|
||||||
|
# compat_urllib_response.addinfourl
|
||||||
try:
|
try:
|
||||||
compat_urllib_response.addinfourl.status
|
compat_urllib_response.addinfourl.status
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
# .getcode() is deprecated in Py 3.
|
# .getcode() is deprecated in Py 3.
|
||||||
compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
|
compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
|
||||||
|
|
||||||
|
|
||||||
|
# compat_http_cookiejar
|
||||||
try:
|
try:
|
||||||
import http.cookiejar as compat_cookiejar
|
import http.cookiejar as compat_cookiejar
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
|
@ -127,12 +145,16 @@ else:
|
||||||
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
||||||
compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
|
compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
|
||||||
|
|
||||||
|
|
||||||
|
# compat_http_cookies
|
||||||
try:
|
try:
|
||||||
import http.cookies as compat_cookies
|
import http.cookies as compat_cookies
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import Cookie as compat_cookies
|
import Cookie as compat_cookies
|
||||||
compat_http_cookies = compat_cookies
|
compat_http_cookies = compat_cookies
|
||||||
|
|
||||||
|
|
||||||
|
# compat_http_cookies_SimpleCookie
|
||||||
if sys.version_info[0] == 2 or sys.version_info < (3, 3):
|
if sys.version_info[0] == 2 or sys.version_info < (3, 3):
|
||||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||||
def load(self, rawdata):
|
def load(self, rawdata):
|
||||||
|
@ -155,11 +177,15 @@ else:
|
||||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||||
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
|
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
|
||||||
|
|
||||||
|
|
||||||
|
# compat_html_entities, probably useless now
|
||||||
try:
|
try:
|
||||||
import html.entities as compat_html_entities
|
import html.entities as compat_html_entities
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import htmlentitydefs as compat_html_entities
|
import htmlentitydefs as compat_html_entities
|
||||||
|
|
||||||
|
|
||||||
|
# compat_html_entities_html5
|
||||||
try: # Python >= 3.3
|
try: # Python >= 3.3
|
||||||
compat_html_entities_html5 = compat_html_entities.html5
|
compat_html_entities_html5 = compat_html_entities.html5
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
|
@ -2408,18 +2434,24 @@ except AttributeError:
|
||||||
# Py < 3.1
|
# Py < 3.1
|
||||||
compat_http_client.HTTPResponse.getcode = lambda self: self.status
|
compat_http_client.HTTPResponse.getcode = lambda self: self.status
|
||||||
|
|
||||||
|
|
||||||
|
# compat_urllib_HTTPError
|
||||||
try:
|
try:
|
||||||
from urllib.error import HTTPError as compat_HTTPError
|
from urllib.error import HTTPError as compat_HTTPError
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urllib2 import HTTPError as compat_HTTPError
|
from urllib2 import HTTPError as compat_HTTPError
|
||||||
compat_urllib_HTTPError = compat_HTTPError
|
compat_urllib_HTTPError = compat_HTTPError
|
||||||
|
|
||||||
|
|
||||||
|
# compat_urllib_request_urlretrieve
|
||||||
try:
|
try:
|
||||||
from urllib.request import urlretrieve as compat_urlretrieve
|
from urllib.request import urlretrieve as compat_urlretrieve
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urllib import urlretrieve as compat_urlretrieve
|
from urllib import urlretrieve as compat_urlretrieve
|
||||||
compat_urllib_request_urlretrieve = compat_urlretrieve
|
compat_urllib_request_urlretrieve = compat_urlretrieve
|
||||||
|
|
||||||
|
|
||||||
|
# compat_html_parser_HTMLParser, compat_html_parser_HTMLParseError
|
||||||
try:
|
try:
|
||||||
from HTMLParser import (
|
from HTMLParser import (
|
||||||
HTMLParser as compat_HTMLParser,
|
HTMLParser as compat_HTMLParser,
|
||||||
|
@ -2432,22 +2464,33 @@ except ImportError: # Python 3
|
||||||
# HTMLParseError was deprecated in Python 3.3 and removed in
|
# HTMLParseError was deprecated in Python 3.3 and removed in
|
||||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||||
# and uniform cross-version exception handling
|
# and uniform cross-version exception handling
|
||||||
|
|
||||||
class compat_HTMLParseError(Exception):
|
class compat_HTMLParseError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
compat_html_parser_HTMLParser = compat_HTMLParser
|
compat_html_parser_HTMLParser = compat_HTMLParser
|
||||||
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||||
|
|
||||||
|
|
||||||
|
# compat_subprocess_get_DEVNULL
|
||||||
try:
|
try:
|
||||||
_DEVNULL = subprocess.DEVNULL
|
_DEVNULL = subprocess.DEVNULL
|
||||||
compat_subprocess_get_DEVNULL = lambda: _DEVNULL
|
compat_subprocess_get_DEVNULL = lambda: _DEVNULL
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
||||||
|
|
||||||
|
|
||||||
|
# compat_http_server
|
||||||
try:
|
try:
|
||||||
import http.server as compat_http_server
|
import http.server as compat_http_server
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import BaseHTTPServer as compat_http_server
|
import BaseHTTPServer as compat_http_server
|
||||||
|
|
||||||
|
|
||||||
|
# compat_urllib_parse_unquote_to_bytes,
|
||||||
|
# compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus,
|
||||||
|
# compat_urllib_parse_urlencode,
|
||||||
|
# compat_urllib_parse_parse_qs
|
||||||
try:
|
try:
|
||||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||||
|
@ -2455,8 +2498,7 @@ try:
|
||||||
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
||||||
from urllib.parse import parse_qs as compat_parse_qs
|
from urllib.parse import parse_qs as compat_parse_qs
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
_asciire = getattr(compat_urllib_parse, '_asciire', None) or re.compile(r'([\x00-\x7f]+)')
|
||||||
else re.compile(r'([\x00-\x7f]+)'))
|
|
||||||
|
|
||||||
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
|
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
|
||||||
# implementations from cpython 3.4.3's stdlib. Python 2's version
|
# implementations from cpython 3.4.3's stdlib. Python 2's version
|
||||||
|
@ -2524,24 +2566,21 @@ except ImportError: # Python 2
|
||||||
# Possible solutions are to either port it from python 3 with all
|
# Possible solutions are to either port it from python 3 with all
|
||||||
# the friends or manually ensure input query contains only byte strings.
|
# the friends or manually ensure input query contains only byte strings.
|
||||||
# We will stick with latter thus recursively encoding the whole query.
|
# We will stick with latter thus recursively encoding the whole query.
|
||||||
def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
|
def compat_urllib_parse_urlencode(query, doseq=0, safe='', encoding='utf-8', errors='strict'):
|
||||||
|
|
||||||
def encode_elem(e):
|
def encode_elem(e):
|
||||||
if isinstance(e, dict):
|
if isinstance(e, dict):
|
||||||
e = encode_dict(e)
|
e = encode_dict(e)
|
||||||
elif isinstance(e, (list, tuple,)):
|
elif isinstance(e, (list, tuple,)):
|
||||||
list_e = encode_list(e)
|
e = type(e)(encode_elem(el) for el in e)
|
||||||
e = tuple(list_e) if isinstance(e, tuple) else list_e
|
|
||||||
elif isinstance(e, compat_str):
|
elif isinstance(e, compat_str):
|
||||||
e = e.encode(encoding)
|
e = e.encode(encoding, errors)
|
||||||
return e
|
return e
|
||||||
|
|
||||||
def encode_dict(d):
|
def encode_dict(d):
|
||||||
return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
|
return tuple((encode_elem(k), encode_elem(v)) for k, v in d.items())
|
||||||
|
|
||||||
def encode_list(l):
|
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq).decode('ascii')
|
||||||
return [encode_elem(e) for e in l]
|
|
||||||
|
|
||||||
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
|
|
||||||
|
|
||||||
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||||
# Python 2's version is apparently totally broken
|
# Python 2's version is apparently totally broken
|
||||||
|
@ -2596,8 +2635,61 @@ except ImportError: # Python 2
|
||||||
('parse_qs', compat_parse_qs)):
|
('parse_qs', compat_parse_qs)):
|
||||||
setattr(compat_urllib_parse, name, fix)
|
setattr(compat_urllib_parse, name, fix)
|
||||||
|
|
||||||
|
try:
|
||||||
|
all(chr(i) in b'' for i in range(256))
|
||||||
|
except TypeError:
|
||||||
|
# not all chr(i) are str: patch Python2 quote
|
||||||
|
|
||||||
|
_safemaps = getattr(compat_urllib_parse, '_safemaps', {})
|
||||||
|
_always_safe = frozenset(compat_urllib_parse.always_safe)
|
||||||
|
|
||||||
|
def _quote(s, safe='/'):
|
||||||
|
"""quote('abc def') -> 'abc%20def'"""
|
||||||
|
|
||||||
|
if not s and s is not None: # fast path
|
||||||
|
return s
|
||||||
|
safe = frozenset(safe)
|
||||||
|
cachekey = (safe, _always_safe)
|
||||||
|
try:
|
||||||
|
safe_map = _safemaps[cachekey]
|
||||||
|
except KeyError:
|
||||||
|
safe = _always_safe | safe
|
||||||
|
safe_map = {}
|
||||||
|
for i in range(256):
|
||||||
|
c = chr(i)
|
||||||
|
safe_map[c] = (
|
||||||
|
c if (i < 128 and c in safe)
|
||||||
|
else b'%{0:02X}'.format(i))
|
||||||
|
_safemaps[cachekey] = safe_map
|
||||||
|
|
||||||
|
if safe.issuperset(s):
|
||||||
|
return s
|
||||||
|
return ''.join(safe_map[c] for c in s)
|
||||||
|
|
||||||
|
# linked code
|
||||||
|
def _quote_plus(s, safe=''):
|
||||||
|
return (
|
||||||
|
_quote(s, safe + b' ').replace(b' ', b'+') if b' ' in s
|
||||||
|
else _quote(s, safe))
|
||||||
|
|
||||||
|
# linked code
|
||||||
|
def _urlcleanup():
|
||||||
|
if compat_urllib_parse._urlopener:
|
||||||
|
compat_urllib_parse._urlopener.cleanup()
|
||||||
|
_safemaps.clear()
|
||||||
|
compat_urllib_parse.ftpcache.clear()
|
||||||
|
|
||||||
|
for name, fix in (
|
||||||
|
('quote', _quote),
|
||||||
|
('quote_plus', _quote_plus),
|
||||||
|
('urlcleanup', _urlcleanup)):
|
||||||
|
setattr(compat_urllib_parse, '_' + name, getattr(compat_urllib_parse, name))
|
||||||
|
setattr(compat_urllib_parse, name, fix)
|
||||||
|
|
||||||
compat_urllib_parse_parse_qs = compat_parse_qs
|
compat_urllib_parse_parse_qs = compat_parse_qs
|
||||||
|
|
||||||
|
|
||||||
|
# compat_urllib_request_DataHandler
|
||||||
try:
|
try:
|
||||||
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
||||||
except ImportError: # Python < 3.4
|
except ImportError: # Python < 3.4
|
||||||
|
@ -2632,16 +2724,20 @@ except ImportError: # Python < 3.4
|
||||||
|
|
||||||
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
||||||
|
|
||||||
|
|
||||||
|
# compat_xml_etree_ElementTree_ParseError
|
||||||
try:
|
try:
|
||||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||||
except ImportError: # Python 2.6
|
except ImportError: # Python 2.6
|
||||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||||
compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
|
compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
|
||||||
|
|
||||||
etree = xml.etree.ElementTree
|
|
||||||
|
# compat_xml_etree_ElementTree_Element
|
||||||
|
_etree = xml.etree.ElementTree
|
||||||
|
|
||||||
|
|
||||||
class _TreeBuilder(etree.TreeBuilder):
|
class _TreeBuilder(_etree.TreeBuilder):
|
||||||
def doctype(self, name, pubid, system):
|
def doctype(self, name, pubid, system):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -2650,7 +2746,7 @@ try:
|
||||||
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
||||||
# the following will crash with:
|
# the following will crash with:
|
||||||
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
||||||
isinstance(None, etree.Element)
|
isinstance(None, _etree.Element)
|
||||||
from xml.etree.ElementTree import Element as compat_etree_Element
|
from xml.etree.ElementTree import Element as compat_etree_Element
|
||||||
except TypeError: # Python <=2.6
|
except TypeError: # Python <=2.6
|
||||||
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
||||||
|
@ -2658,12 +2754,12 @@ compat_xml_etree_ElementTree_Element = compat_etree_Element
|
||||||
|
|
||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3:
|
||||||
def compat_etree_fromstring(text):
|
def compat_etree_fromstring(text):
|
||||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
return _etree.XML(text, parser=_etree.XMLParser(target=_TreeBuilder()))
|
||||||
else:
|
else:
|
||||||
# python 2.x tries to encode unicode strings with ascii (see the
|
# python 2.x tries to encode unicode strings with ascii (see the
|
||||||
# XMLParser._fixtext method)
|
# XMLParser._fixtext method)
|
||||||
try:
|
try:
|
||||||
_etree_iter = etree.Element.iter
|
_etree_iter = _etree.Element.iter
|
||||||
except AttributeError: # Python <=2.6
|
except AttributeError: # Python <=2.6
|
||||||
def _etree_iter(root):
|
def _etree_iter(root):
|
||||||
for el in root.findall('*'):
|
for el in root.findall('*'):
|
||||||
|
@ -2675,27 +2771,29 @@ else:
|
||||||
# 2.7 source
|
# 2.7 source
|
||||||
def _XML(text, parser=None):
|
def _XML(text, parser=None):
|
||||||
if not parser:
|
if not parser:
|
||||||
parser = etree.XMLParser(target=_TreeBuilder())
|
parser = _etree.XMLParser(target=_TreeBuilder())
|
||||||
parser.feed(text)
|
parser.feed(text)
|
||||||
return parser.close()
|
return parser.close()
|
||||||
|
|
||||||
def _element_factory(*args, **kwargs):
|
def _element_factory(*args, **kwargs):
|
||||||
el = etree.Element(*args, **kwargs)
|
el = _etree.Element(*args, **kwargs)
|
||||||
for k, v in el.items():
|
for k, v in el.items():
|
||||||
if isinstance(v, bytes):
|
if isinstance(v, bytes):
|
||||||
el.set(k, v.decode('utf-8'))
|
el.set(k, v.decode('utf-8'))
|
||||||
return el
|
return el
|
||||||
|
|
||||||
def compat_etree_fromstring(text):
|
def compat_etree_fromstring(text):
|
||||||
doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
|
doc = _XML(text, parser=_etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
|
||||||
for el in _etree_iter(doc):
|
for el in _etree_iter(doc):
|
||||||
if el.text is not None and isinstance(el.text, bytes):
|
if el.text is not None and isinstance(el.text, bytes):
|
||||||
el.text = el.text.decode('utf-8')
|
el.text = el.text.decode('utf-8')
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
if hasattr(etree, 'register_namespace'):
|
|
||||||
compat_etree_register_namespace = etree.register_namespace
|
# compat_xml_etree_register_namespace
|
||||||
else:
|
try:
|
||||||
|
compat_etree_register_namespace = _etree.register_namespace
|
||||||
|
except AttributeError:
|
||||||
def compat_etree_register_namespace(prefix, uri):
|
def compat_etree_register_namespace(prefix, uri):
|
||||||
"""Register a namespace prefix.
|
"""Register a namespace prefix.
|
||||||
The registry is global, and any existing mapping for either the
|
The registry is global, and any existing mapping for either the
|
||||||
|
@ -2704,14 +2802,16 @@ else:
|
||||||
attributes in this namespace will be serialized with prefix if possible.
|
attributes in this namespace will be serialized with prefix if possible.
|
||||||
ValueError is raised if prefix is reserved or is invalid.
|
ValueError is raised if prefix is reserved or is invalid.
|
||||||
"""
|
"""
|
||||||
if re.match(r"ns\d+$", prefix):
|
if re.match(r'ns\d+$', prefix):
|
||||||
raise ValueError("Prefix format reserved for internal use")
|
raise ValueError('Prefix format reserved for internal use')
|
||||||
for k, v in list(etree._namespace_map.items()):
|
for k, v in list(_etree._namespace_map.items()):
|
||||||
if k == uri or v == prefix:
|
if k == uri or v == prefix:
|
||||||
del etree._namespace_map[k]
|
del _etree._namespace_map[k]
|
||||||
etree._namespace_map[uri] = prefix
|
_etree._namespace_map[uri] = prefix
|
||||||
compat_xml_etree_register_namespace = compat_etree_register_namespace
|
compat_xml_etree_register_namespace = compat_etree_register_namespace
|
||||||
|
|
||||||
|
|
||||||
|
# compat_xpath, compat_etree_iterfind
|
||||||
if sys.version_info < (2, 7):
|
if sys.version_info < (2, 7):
|
||||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||||
# .//node does not match if a node is a direct child of . !
|
# .//node does not match if a node is a direct child of . !
|
||||||
|
@ -2898,7 +2998,6 @@ if sys.version_info < (2, 7):
|
||||||
def __init__(self, root):
|
def __init__(self, root):
|
||||||
self.root = root
|
self.root = root
|
||||||
|
|
||||||
##
|
|
||||||
# Generate all matching objects.
|
# Generate all matching objects.
|
||||||
|
|
||||||
def compat_etree_iterfind(elem, path, namespaces=None):
|
def compat_etree_iterfind(elem, path, namespaces=None):
|
||||||
|
@ -2933,13 +3032,15 @@ if sys.version_info < (2, 7):
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
compat_xpath = lambda xpath: xpath
|
|
||||||
compat_etree_iterfind = lambda element, match: element.iterfind(match)
|
compat_etree_iterfind = lambda element, match: element.iterfind(match)
|
||||||
|
compat_xpath = _IDENTITY
|
||||||
|
|
||||||
|
|
||||||
|
# compat_os_name
|
||||||
compat_os_name = os._name if os.name == 'java' else os.name
|
compat_os_name = os._name if os.name == 'java' else os.name
|
||||||
|
|
||||||
|
|
||||||
|
# compat_shlex_quote
|
||||||
if compat_os_name == 'nt':
|
if compat_os_name == 'nt':
|
||||||
def compat_shlex_quote(s):
|
def compat_shlex_quote(s):
|
||||||
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||||
|
@ -2954,6 +3055,7 @@ else:
|
||||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||||
|
|
||||||
|
|
||||||
|
# compat_shlex.split
|
||||||
try:
|
try:
|
||||||
args = shlex.split('中文')
|
args = shlex.split('中文')
|
||||||
assert (isinstance(args, list)
|
assert (isinstance(args, list)
|
||||||
|
@ -2969,6 +3071,7 @@ except (AssertionError, UnicodeEncodeError):
|
||||||
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
|
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
|
||||||
|
|
||||||
|
|
||||||
|
# compat_ord
|
||||||
def compat_ord(c):
|
def compat_ord(c):
|
||||||
if isinstance(c, int):
|
if isinstance(c, int):
|
||||||
return c
|
return c
|
||||||
|
@ -2976,6 +3079,7 @@ def compat_ord(c):
|
||||||
return ord(c)
|
return ord(c)
|
||||||
|
|
||||||
|
|
||||||
|
# compat_getenv, compat_os_path_expanduser, compat_setenv
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
compat_getenv = os.getenv
|
compat_getenv = os.getenv
|
||||||
compat_expanduser = os.path.expanduser
|
compat_expanduser = os.path.expanduser
|
||||||
|
@ -3063,6 +3167,22 @@ else:
|
||||||
compat_os_path_expanduser = compat_expanduser
|
compat_os_path_expanduser = compat_expanduser
|
||||||
|
|
||||||
|
|
||||||
|
# compat_os_makedirs
|
||||||
|
try:
|
||||||
|
os.makedirs('.', exist_ok=True)
|
||||||
|
compat_os_makedirs = os.makedirs
|
||||||
|
except TypeError: # < Py3.2
|
||||||
|
from errno import EEXIST as _errno_EEXIST
|
||||||
|
|
||||||
|
def compat_os_makedirs(name, mode=0o777, exist_ok=False):
|
||||||
|
try:
|
||||||
|
return os.makedirs(name, mode=mode)
|
||||||
|
except OSError as ose:
|
||||||
|
if not (exist_ok and ose.errno == _errno_EEXIST):
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
# compat_os_path_realpath
|
||||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||||
# os.path.realpath on Windows does not follow symbolic links
|
# os.path.realpath on Windows does not follow symbolic links
|
||||||
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
|
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
|
||||||
|
@ -3076,6 +3196,7 @@ else:
|
||||||
compat_os_path_realpath = compat_realpath
|
compat_os_path_realpath = compat_realpath
|
||||||
|
|
||||||
|
|
||||||
|
# compat_print
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
def compat_print(s):
|
def compat_print(s):
|
||||||
from .utils import preferredencoding
|
from .utils import preferredencoding
|
||||||
|
@ -3086,6 +3207,7 @@ else:
|
||||||
print(s)
|
print(s)
|
||||||
|
|
||||||
|
|
||||||
|
# compat_getpass_getpass
|
||||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||||
def compat_getpass(prompt, *args, **kwargs):
|
def compat_getpass(prompt, *args, **kwargs):
|
||||||
if isinstance(prompt, compat_str):
|
if isinstance(prompt, compat_str):
|
||||||
|
@ -3098,22 +3220,22 @@ else:
|
||||||
compat_getpass_getpass = compat_getpass
|
compat_getpass_getpass = compat_getpass
|
||||||
|
|
||||||
|
|
||||||
|
# compat_input
|
||||||
try:
|
try:
|
||||||
compat_input = raw_input
|
compat_input = raw_input
|
||||||
except NameError: # Python 3
|
except NameError: # Python 3
|
||||||
compat_input = input
|
compat_input = input
|
||||||
|
|
||||||
|
|
||||||
|
# compat_kwargs
|
||||||
# Python < 2.6.5 require kwargs to be bytes
|
# Python < 2.6.5 require kwargs to be bytes
|
||||||
try:
|
try:
|
||||||
def _testfunc(x):
|
(lambda x: x)(**{'x': 0})
|
||||||
pass
|
|
||||||
_testfunc(**{'x': 0})
|
|
||||||
except TypeError:
|
except TypeError:
|
||||||
def compat_kwargs(kwargs):
|
def compat_kwargs(kwargs):
|
||||||
return dict((bytes(k), v) for k, v in kwargs.items())
|
return dict((bytes(k), v) for k, v in kwargs.items())
|
||||||
else:
|
else:
|
||||||
compat_kwargs = lambda kwargs: kwargs
|
compat_kwargs = _IDENTITY
|
||||||
|
|
||||||
|
|
||||||
# compat_numeric_types
|
# compat_numeric_types
|
||||||
|
@ -3132,6 +3254,8 @@ except NameError: # Python 3
|
||||||
# compat_int
|
# compat_int
|
||||||
compat_int = compat_integer_types[-1]
|
compat_int = compat_integer_types[-1]
|
||||||
|
|
||||||
|
|
||||||
|
# compat_socket_create_connection
|
||||||
if sys.version_info < (2, 7):
|
if sys.version_info < (2, 7):
|
||||||
def compat_socket_create_connection(address, timeout, source_address=None):
|
def compat_socket_create_connection(address, timeout, source_address=None):
|
||||||
host, port = address
|
host, port = address
|
||||||
|
@ -3158,6 +3282,7 @@ else:
|
||||||
compat_socket_create_connection = socket.create_connection
|
compat_socket_create_connection = socket.create_connection
|
||||||
|
|
||||||
|
|
||||||
|
# compat_contextlib_suppress
|
||||||
try:
|
try:
|
||||||
from contextlib import suppress as compat_contextlib_suppress
|
from contextlib import suppress as compat_contextlib_suppress
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -3200,12 +3325,12 @@ except AttributeError:
|
||||||
# repeated .close() is OK, but just in case
|
# repeated .close() is OK, but just in case
|
||||||
with compat_contextlib_suppress(EnvironmentError):
|
with compat_contextlib_suppress(EnvironmentError):
|
||||||
f.close()
|
f.close()
|
||||||
popen.wait()
|
popen.wait()
|
||||||
|
|
||||||
|
|
||||||
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
|
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
|
||||||
# See http://bugs.python.org/issue9161 for what is broken
|
# See http://bugs.python.org/issue9161 for what is broken
|
||||||
def workaround_optparse_bug9161():
|
def _workaround_optparse_bug9161():
|
||||||
op = optparse.OptionParser()
|
op = optparse.OptionParser()
|
||||||
og = optparse.OptionGroup(op, 'foo')
|
og = optparse.OptionGroup(op, 'foo')
|
||||||
try:
|
try:
|
||||||
|
@ -3224,9 +3349,10 @@ def workaround_optparse_bug9161():
|
||||||
optparse.OptionGroup.add_option = _compat_add_option
|
optparse.OptionGroup.add_option = _compat_add_option
|
||||||
|
|
||||||
|
|
||||||
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
|
# compat_shutil_get_terminal_size
|
||||||
compat_get_terminal_size = shutil.get_terminal_size
|
try:
|
||||||
else:
|
from shutil import get_terminal_size as compat_get_terminal_size # Python >= 3.3
|
||||||
|
except ImportError:
|
||||||
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
|
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
|
||||||
|
|
||||||
def compat_get_terminal_size(fallback=(80, 24)):
|
def compat_get_terminal_size(fallback=(80, 24)):
|
||||||
|
@ -3256,27 +3382,33 @@ else:
|
||||||
columns = _columns
|
columns = _columns
|
||||||
if lines is None or lines <= 0:
|
if lines is None or lines <= 0:
|
||||||
lines = _lines
|
lines = _lines
|
||||||
|
|
||||||
return _terminal_size(columns, lines)
|
return _terminal_size(columns, lines)
|
||||||
|
|
||||||
|
compat_shutil_get_terminal_size = compat_get_terminal_size
|
||||||
|
|
||||||
|
|
||||||
|
# compat_itertools_count
|
||||||
try:
|
try:
|
||||||
itertools.count(start=0, step=1)
|
type(itertools.count(start=0, step=1))
|
||||||
compat_itertools_count = itertools.count
|
compat_itertools_count = itertools.count
|
||||||
except TypeError: # Python 2.6
|
except TypeError: # Python 2.6 lacks step
|
||||||
def compat_itertools_count(start=0, step=1):
|
def compat_itertools_count(start=0, step=1):
|
||||||
while True:
|
while True:
|
||||||
yield start
|
yield start
|
||||||
start += step
|
start += step
|
||||||
|
|
||||||
|
|
||||||
|
# compat_tokenize_tokenize
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
from tokenize import tokenize as compat_tokenize_tokenize
|
from tokenize import tokenize as compat_tokenize_tokenize
|
||||||
else:
|
else:
|
||||||
from tokenize import generate_tokens as compat_tokenize_tokenize
|
from tokenize import generate_tokens as compat_tokenize_tokenize
|
||||||
|
|
||||||
|
|
||||||
|
# compat_struct_pack, compat_struct_unpack, compat_Struct
|
||||||
try:
|
try:
|
||||||
struct.pack('!I', 0)
|
type(struct.pack('!I', 0))
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
|
# In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
|
||||||
# See https://bugs.python.org/issue19099
|
# See https://bugs.python.org/issue19099
|
||||||
|
@ -3308,8 +3440,10 @@ else:
|
||||||
compat_Struct = struct.Struct
|
compat_Struct = struct.Struct
|
||||||
|
|
||||||
|
|
||||||
# compat_map/filter() returning an iterator, supposedly the
|
# builtins returning an iterator
|
||||||
# same versioning as for zip below
|
|
||||||
|
# compat_map, compat_filter
|
||||||
|
# supposedly the same versioning as for zip below
|
||||||
try:
|
try:
|
||||||
from future_builtins import map as compat_map
|
from future_builtins import map as compat_map
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -3326,6 +3460,7 @@ except ImportError:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
compat_filter = filter
|
compat_filter = filter
|
||||||
|
|
||||||
|
# compat_zip
|
||||||
try:
|
try:
|
||||||
from future_builtins import zip as compat_zip
|
from future_builtins import zip as compat_zip
|
||||||
except ImportError: # not 2.6+ or is 3.x
|
except ImportError: # not 2.6+ or is 3.x
|
||||||
|
@ -3335,6 +3470,7 @@ except ImportError: # not 2.6+ or is 3.x
|
||||||
compat_zip = zip
|
compat_zip = zip
|
||||||
|
|
||||||
|
|
||||||
|
# compat_itertools_zip_longest
|
||||||
# method renamed between Py2/3
|
# method renamed between Py2/3
|
||||||
try:
|
try:
|
||||||
from itertools import zip_longest as compat_itertools_zip_longest
|
from itertools import zip_longest as compat_itertools_zip_longest
|
||||||
|
@ -3342,7 +3478,8 @@ except ImportError:
|
||||||
from itertools import izip_longest as compat_itertools_zip_longest
|
from itertools import izip_longest as compat_itertools_zip_longest
|
||||||
|
|
||||||
|
|
||||||
# new class in collections
|
# compat_collections_chain_map
|
||||||
|
# collections.ChainMap: new class
|
||||||
try:
|
try:
|
||||||
from collections import ChainMap as compat_collections_chain_map
|
from collections import ChainMap as compat_collections_chain_map
|
||||||
# Py3.3's ChainMap is deficient
|
# Py3.3's ChainMap is deficient
|
||||||
|
@ -3398,19 +3535,22 @@ except ImportError:
|
||||||
def new_child(self, m=None, **kwargs):
|
def new_child(self, m=None, **kwargs):
|
||||||
m = m or {}
|
m = m or {}
|
||||||
m.update(kwargs)
|
m.update(kwargs)
|
||||||
return compat_collections_chain_map(m, *self.maps)
|
# support inheritance !
|
||||||
|
return type(self)(m, *self.maps)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parents(self):
|
def parents(self):
|
||||||
return compat_collections_chain_map(*(self.maps[1:]))
|
return type(self)(*(self.maps[1:]))
|
||||||
|
|
||||||
|
|
||||||
|
# compat_re_Pattern, compat_re_Match
|
||||||
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
|
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
|
||||||
compat_re_Pattern = type(re.compile(''))
|
compat_re_Pattern = type(re.compile(''))
|
||||||
# and on the type of a match
|
# and on the type of a match
|
||||||
compat_re_Match = type(re.match('a', 'a'))
|
compat_re_Match = type(re.match('a', 'a'))
|
||||||
|
|
||||||
|
|
||||||
|
# compat_base64_b64decode
|
||||||
if sys.version_info < (3, 3):
|
if sys.version_info < (3, 3):
|
||||||
def compat_b64decode(s, *args, **kwargs):
|
def compat_b64decode(s, *args, **kwargs):
|
||||||
if isinstance(s, compat_str):
|
if isinstance(s, compat_str):
|
||||||
|
@ -3422,6 +3562,7 @@ else:
|
||||||
compat_base64_b64decode = compat_b64decode
|
compat_base64_b64decode = compat_b64decode
|
||||||
|
|
||||||
|
|
||||||
|
# compat_ctypes_WINFUNCTYPE
|
||||||
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
||||||
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
||||||
# names, see the original PyPy issue [1] and the youtube-dl one [2].
|
# names, see the original PyPy issue [1] and the youtube-dl one [2].
|
||||||
|
@ -3440,6 +3581,7 @@ else:
|
||||||
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# compat_open
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
# open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
|
# open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
|
||||||
def compat_open(file_, *args, **kwargs):
|
def compat_open(file_, *args, **kwargs):
|
||||||
|
@ -3467,18 +3609,28 @@ except AttributeError:
|
||||||
def compat_datetime_timedelta_total_seconds(td):
|
def compat_datetime_timedelta_total_seconds(td):
|
||||||
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
|
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
|
||||||
|
|
||||||
|
|
||||||
# optional decompression packages
|
# optional decompression packages
|
||||||
|
# compat_brotli
|
||||||
# PyPi brotli package implements 'br' Content-Encoding
|
# PyPi brotli package implements 'br' Content-Encoding
|
||||||
try:
|
try:
|
||||||
import brotli as compat_brotli
|
import brotli as compat_brotli
|
||||||
except ImportError:
|
except ImportError:
|
||||||
compat_brotli = None
|
compat_brotli = None
|
||||||
|
# compat_ncompress
|
||||||
# PyPi ncompress package implements 'compress' Content-Encoding
|
# PyPi ncompress package implements 'compress' Content-Encoding
|
||||||
try:
|
try:
|
||||||
import ncompress as compat_ncompress
|
import ncompress as compat_ncompress
|
||||||
except ImportError:
|
except ImportError:
|
||||||
compat_ncompress = None
|
compat_ncompress = None
|
||||||
|
|
||||||
|
# compat_zstandard
|
||||||
|
# PyPi zstandard package implements 'zstd' Content-Encoding (RFC 8878 7.2)
|
||||||
|
try:
|
||||||
|
import zstandard as compat_zstandard
|
||||||
|
except ImportError:
|
||||||
|
compat_zstandard = None
|
||||||
|
|
||||||
|
|
||||||
legacy = [
|
legacy = [
|
||||||
'compat_HTMLParseError',
|
'compat_HTMLParseError',
|
||||||
|
@ -3495,6 +3647,7 @@ legacy = [
|
||||||
'compat_getpass',
|
'compat_getpass',
|
||||||
'compat_parse_qs',
|
'compat_parse_qs',
|
||||||
'compat_realpath',
|
'compat_realpath',
|
||||||
|
'compat_shlex_split',
|
||||||
'compat_urllib_parse_parse_qs',
|
'compat_urllib_parse_parse_qs',
|
||||||
'compat_urllib_parse_unquote',
|
'compat_urllib_parse_unquote',
|
||||||
'compat_urllib_parse_unquote_plus',
|
'compat_urllib_parse_unquote_plus',
|
||||||
|
@ -3508,8 +3661,6 @@ legacy = [
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'compat_html_parser_HTMLParseError',
|
|
||||||
'compat_html_parser_HTMLParser',
|
|
||||||
'compat_Struct',
|
'compat_Struct',
|
||||||
'compat_base64_b64decode',
|
'compat_base64_b64decode',
|
||||||
'compat_basestring',
|
'compat_basestring',
|
||||||
|
@ -3518,13 +3669,9 @@ __all__ = [
|
||||||
'compat_chr',
|
'compat_chr',
|
||||||
'compat_collections_abc',
|
'compat_collections_abc',
|
||||||
'compat_collections_chain_map',
|
'compat_collections_chain_map',
|
||||||
'compat_datetime_timedelta_total_seconds',
|
|
||||||
'compat_http_cookiejar',
|
|
||||||
'compat_http_cookiejar_Cookie',
|
|
||||||
'compat_http_cookies',
|
|
||||||
'compat_http_cookies_SimpleCookie',
|
|
||||||
'compat_contextlib_suppress',
|
'compat_contextlib_suppress',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
|
'compat_datetime_timedelta_total_seconds',
|
||||||
'compat_etree_fromstring',
|
'compat_etree_fromstring',
|
||||||
'compat_etree_iterfind',
|
'compat_etree_iterfind',
|
||||||
'compat_filter',
|
'compat_filter',
|
||||||
|
@ -3533,6 +3680,12 @@ __all__ = [
|
||||||
'compat_getpass_getpass',
|
'compat_getpass_getpass',
|
||||||
'compat_html_entities',
|
'compat_html_entities',
|
||||||
'compat_html_entities_html5',
|
'compat_html_entities_html5',
|
||||||
|
'compat_html_parser_HTMLParseError',
|
||||||
|
'compat_html_parser_HTMLParser',
|
||||||
|
'compat_http_cookiejar',
|
||||||
|
'compat_http_cookiejar_Cookie',
|
||||||
|
'compat_http_cookies',
|
||||||
|
'compat_http_cookies_SimpleCookie',
|
||||||
'compat_http_client',
|
'compat_http_client',
|
||||||
'compat_http_server',
|
'compat_http_server',
|
||||||
'compat_input',
|
'compat_input',
|
||||||
|
@ -3546,6 +3699,7 @@ __all__ = [
|
||||||
'compat_numeric_types',
|
'compat_numeric_types',
|
||||||
'compat_open',
|
'compat_open',
|
||||||
'compat_ord',
|
'compat_ord',
|
||||||
|
'compat_os_makedirs',
|
||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
'compat_os_path_expanduser',
|
'compat_os_path_expanduser',
|
||||||
'compat_os_path_realpath',
|
'compat_os_path_realpath',
|
||||||
|
@ -3555,7 +3709,7 @@ __all__ = [
|
||||||
'compat_register_utf8',
|
'compat_register_utf8',
|
||||||
'compat_setenv',
|
'compat_setenv',
|
||||||
'compat_shlex_quote',
|
'compat_shlex_quote',
|
||||||
'compat_shlex_split',
|
'compat_shutil_get_terminal_size',
|
||||||
'compat_socket_create_connection',
|
'compat_socket_create_connection',
|
||||||
'compat_str',
|
'compat_str',
|
||||||
'compat_struct_pack',
|
'compat_struct_pack',
|
||||||
|
@ -3575,5 +3729,5 @@ __all__ = [
|
||||||
'compat_xml_etree_register_namespace',
|
'compat_xml_etree_register_namespace',
|
||||||
'compat_xpath',
|
'compat_xpath',
|
||||||
'compat_zip',
|
'compat_zip',
|
||||||
'workaround_optparse_bug9161',
|
'compat_zstandard',
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
@ -496,20 +497,31 @@ class FFmpegFD(ExternalFD):
|
||||||
# as a context manager (newer Python 3.x and compat)
|
# as a context manager (newer Python 3.x and compat)
|
||||||
# Fixes "Resource Warning" in test/test_downloader_external.py
|
# Fixes "Resource Warning" in test/test_downloader_external.py
|
||||||
# [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
|
# [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
|
||||||
with compat_subprocess_Popen(args, stdin=subprocess.PIPE, env=env) as proc:
|
_proc = compat_subprocess_Popen(
|
||||||
|
args,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
text=True,
|
||||||
|
universal_newlines=True,
|
||||||
|
bufsize=1,
|
||||||
|
env=env,
|
||||||
|
)
|
||||||
|
ffmpeg_logger = logging.getLogger('ffmpeg')
|
||||||
|
with _proc as proc:
|
||||||
try:
|
try:
|
||||||
|
for line in iter(proc.stdout.readline, ''):
|
||||||
|
ffmpeg_logger.debug(line.strip())
|
||||||
|
|
||||||
|
proc.stdout.close()
|
||||||
retval = proc.wait()
|
retval = proc.wait()
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
# subprocess.run would send the SIGKILL signal to ffmpeg and the
|
if isinstance(e, KeyError) and (sys.platform != 'win32'):
|
||||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
process_communicate_or_kill(proc, 'q')
|
||||||
# produces a file that is playable (this is mostly useful for live
|
|
||||||
# streams). Note that Windows is not affected and produces playable
|
|
||||||
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
|
|
||||||
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
|
|
||||||
process_communicate_or_kill(proc, b'q')
|
|
||||||
else:
|
else:
|
||||||
proc.kill()
|
proc.kill()
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class BokeCCIE(BokeCCBaseIE):
|
class BokeCCIE(BokeCCBaseIE):
|
||||||
_IE_DESC = 'CC视频'
|
IE_DESC = 'CC视频'
|
||||||
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
|
|
@ -9,7 +9,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class CloudyIE(InfoExtractor):
|
class CloudyIE(InfoExtractor):
|
||||||
_IE_DESC = 'cloudy.ec'
|
IE_DESC = 'cloudy.ec'
|
||||||
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||||
|
|
|
@ -422,6 +422,8 @@ class InfoExtractor(object):
|
||||||
_GEO_COUNTRIES = None
|
_GEO_COUNTRIES = None
|
||||||
_GEO_IP_BLOCKS = None
|
_GEO_IP_BLOCKS = None
|
||||||
_WORKING = True
|
_WORKING = True
|
||||||
|
# supply this in public subclasses: used in supported sites list, etc
|
||||||
|
# IE_DESC = 'short description of IE'
|
||||||
|
|
||||||
def __init__(self, downloader=None):
|
def __init__(self, downloader=None):
|
||||||
"""Constructor. Receives an optional downloader."""
|
"""Constructor. Receives an optional downloader."""
|
||||||
|
@ -503,7 +505,7 @@ class InfoExtractor(object):
|
||||||
if not self._x_forwarded_for_ip:
|
if not self._x_forwarded_for_ip:
|
||||||
|
|
||||||
# Geo bypass mechanism is explicitly disabled by user
|
# Geo bypass mechanism is explicitly disabled by user
|
||||||
if not self._downloader.params.get('geo_bypass', True):
|
if not self.get_param('geo_bypass', True):
|
||||||
return
|
return
|
||||||
|
|
||||||
if not geo_bypass_context:
|
if not geo_bypass_context:
|
||||||
|
@ -525,7 +527,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
# Explicit IP block specified by user, use it right away
|
# Explicit IP block specified by user, use it right away
|
||||||
# regardless of whether extractor is geo bypassable or not
|
# regardless of whether extractor is geo bypassable or not
|
||||||
ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
|
ip_block = self.get_param('geo_bypass_ip_block', None)
|
||||||
|
|
||||||
# Otherwise use random IP block from geo bypass context but only
|
# Otherwise use random IP block from geo bypass context but only
|
||||||
# if extractor is known as geo bypassable
|
# if extractor is known as geo bypassable
|
||||||
|
@ -536,8 +538,8 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
if ip_block:
|
if ip_block:
|
||||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
|
||||||
if self._downloader.params.get('verbose', False):
|
if self.get_param('verbose', False):
|
||||||
self._downloader.to_screen(
|
self.to_screen(
|
||||||
'[debug] Using fake IP %s as X-Forwarded-For.'
|
'[debug] Using fake IP %s as X-Forwarded-For.'
|
||||||
% self._x_forwarded_for_ip)
|
% self._x_forwarded_for_ip)
|
||||||
return
|
return
|
||||||
|
@ -546,7 +548,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
# Explicit country code specified by user, use it right away
|
# Explicit country code specified by user, use it right away
|
||||||
# regardless of whether extractor is geo bypassable or not
|
# regardless of whether extractor is geo bypassable or not
|
||||||
country = self._downloader.params.get('geo_bypass_country', None)
|
country = self.get_param('geo_bypass_country', None)
|
||||||
|
|
||||||
# Otherwise use random country code from geo bypass context but
|
# Otherwise use random country code from geo bypass context but
|
||||||
# only if extractor is known as geo bypassable
|
# only if extractor is known as geo bypassable
|
||||||
|
@ -557,8 +559,8 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
if country:
|
if country:
|
||||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
|
||||||
if self._downloader.params.get('verbose', False):
|
if self.get_param('verbose', False):
|
||||||
self._downloader.to_screen(
|
self.to_screen(
|
||||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||||
% (self._x_forwarded_for_ip, country.upper()))
|
% (self._x_forwarded_for_ip, country.upper()))
|
||||||
|
|
||||||
|
@ -584,9 +586,9 @@ class InfoExtractor(object):
|
||||||
raise ExtractorError('An extractor error has occurred.', cause=e)
|
raise ExtractorError('An extractor error has occurred.', cause=e)
|
||||||
|
|
||||||
def __maybe_fake_ip_and_retry(self, countries):
|
def __maybe_fake_ip_and_retry(self, countries):
|
||||||
if (not self._downloader.params.get('geo_bypass_country', None)
|
if (not self.get_param('geo_bypass_country', None)
|
||||||
and self._GEO_BYPASS
|
and self._GEO_BYPASS
|
||||||
and self._downloader.params.get('geo_bypass', True)
|
and self.get_param('geo_bypass', True)
|
||||||
and not self._x_forwarded_for_ip
|
and not self._x_forwarded_for_ip
|
||||||
and countries):
|
and countries):
|
||||||
country_code = random.choice(countries)
|
country_code = random.choice(countries)
|
||||||
|
@ -696,7 +698,7 @@ class InfoExtractor(object):
|
||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(errmsg)
|
self.report_warning(errmsg)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||||
|
@ -768,11 +770,11 @@ class InfoExtractor(object):
|
||||||
webpage_bytes = prefix + webpage_bytes
|
webpage_bytes = prefix + webpage_bytes
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
|
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
|
||||||
if self._downloader.params.get('dump_intermediate_pages', False):
|
if self.get_param('dump_intermediate_pages', False):
|
||||||
self.to_screen('Dumping request to ' + urlh.geturl())
|
self.to_screen('Dumping request to ' + urlh.geturl())
|
||||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||||
self._downloader.to_screen(dump)
|
self.to_screen(dump)
|
||||||
if self._downloader.params.get('write_pages', False):
|
if self.get_param('write_pages', False):
|
||||||
basen = '%s_%s' % (video_id, urlh.geturl())
|
basen = '%s_%s' % (video_id, urlh.geturl())
|
||||||
if len(basen) > 240:
|
if len(basen) > 240:
|
||||||
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||||
|
@ -974,19 +976,9 @@ class InfoExtractor(object):
|
||||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||||
self._downloader.to_screen(self.__ie_msg(msg))
|
self._downloader.to_screen(self.__ie_msg(msg))
|
||||||
|
|
||||||
def write_debug(self, msg, only_once=False, _cache=[]):
|
def write_debug(self, msg, only_once=False):
|
||||||
'''Log debug message or Print message to stderr'''
|
'''Log debug message or Print message to stderr'''
|
||||||
if not self.get_param('verbose', False):
|
self._downloader.write_debug(self.__ie_msg(msg), only_once=only_once)
|
||||||
return
|
|
||||||
message = '[debug] ' + self.__ie_msg(msg)
|
|
||||||
logger = self.get_param('logger')
|
|
||||||
if logger:
|
|
||||||
logger.debug(message)
|
|
||||||
else:
|
|
||||||
if only_once and hash(message) in _cache:
|
|
||||||
return
|
|
||||||
self._downloader.to_stderr(message)
|
|
||||||
_cache.append(hash(message))
|
|
||||||
|
|
||||||
# name, default=None, *args, **kwargs
|
# name, default=None, *args, **kwargs
|
||||||
def get_param(self, name, *args, **kwargs):
|
def get_param(self, name, *args, **kwargs):
|
||||||
|
@ -1082,7 +1074,7 @@ class InfoExtractor(object):
|
||||||
if mobj:
|
if mobj:
|
||||||
break
|
break
|
||||||
|
|
||||||
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||||
_name = '\033[0;34m%s\033[0m' % name
|
_name = '\033[0;34m%s\033[0m' % name
|
||||||
else:
|
else:
|
||||||
_name = name
|
_name = name
|
||||||
|
@ -1100,7 +1092,7 @@ class InfoExtractor(object):
|
||||||
elif fatal:
|
elif fatal:
|
||||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
self.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _search_json(self, start_pattern, string, name, video_id, **kwargs):
|
def _search_json(self, start_pattern, string, name, video_id, **kwargs):
|
||||||
|
@ -1170,7 +1162,7 @@ class InfoExtractor(object):
|
||||||
username = None
|
username = None
|
||||||
password = None
|
password = None
|
||||||
|
|
||||||
if self._downloader.params.get('usenetrc', False):
|
if self.get_param('usenetrc', False):
|
||||||
try:
|
try:
|
||||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||||
info = netrc.netrc().authenticators(netrc_machine)
|
info = netrc.netrc().authenticators(netrc_machine)
|
||||||
|
@ -1181,7 +1173,7 @@ class InfoExtractor(object):
|
||||||
raise netrc.NetrcParseError(
|
raise netrc.NetrcParseError(
|
||||||
'No authenticators for %s' % netrc_machine)
|
'No authenticators for %s' % netrc_machine)
|
||||||
except (AttributeError, IOError, netrc.NetrcParseError) as err:
|
except (AttributeError, IOError, netrc.NetrcParseError) as err:
|
||||||
self._downloader.report_warning(
|
self.report_warning(
|
||||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||||
|
|
||||||
return username, password
|
return username, password
|
||||||
|
@ -1218,10 +1210,10 @@ class InfoExtractor(object):
|
||||||
"""
|
"""
|
||||||
if self._downloader is None:
|
if self._downloader is None:
|
||||||
return None
|
return None
|
||||||
downloader_params = self._downloader.params
|
|
||||||
|
|
||||||
if downloader_params.get('twofactor') is not None:
|
twofactor = self.get_param('twofactor')
|
||||||
return downloader_params['twofactor']
|
if twofactor is not None:
|
||||||
|
return twofactor
|
||||||
|
|
||||||
return compat_getpass('Type %s and press [Return]: ' % note)
|
return compat_getpass('Type %s and press [Return]: ' % note)
|
||||||
|
|
||||||
|
@ -1356,7 +1348,7 @@ class InfoExtractor(object):
|
||||||
elif fatal:
|
elif fatal:
|
||||||
raise RegexNotFoundError('Unable to extract JSON-LD')
|
raise RegexNotFoundError('Unable to extract JSON-LD')
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||||
|
@ -1587,7 +1579,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
if f.get('vcodec') == 'none': # audio only
|
if f.get('vcodec') == 'none': # audio only
|
||||||
preference -= 50
|
preference -= 50
|
||||||
if self._downloader.params.get('prefer_free_formats'):
|
if self.get_param('prefer_free_formats'):
|
||||||
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
|
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
|
||||||
else:
|
else:
|
||||||
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
|
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
|
||||||
|
@ -1599,7 +1591,7 @@ class InfoExtractor(object):
|
||||||
else:
|
else:
|
||||||
if f.get('acodec') == 'none': # video only
|
if f.get('acodec') == 'none': # video only
|
||||||
preference -= 40
|
preference -= 40
|
||||||
if self._downloader.params.get('prefer_free_formats'):
|
if self.get_param('prefer_free_formats'):
|
||||||
ORDER = ['flv', 'mp4', 'webm']
|
ORDER = ['flv', 'mp4', 'webm']
|
||||||
else:
|
else:
|
||||||
ORDER = ['webm', 'flv', 'mp4']
|
ORDER = ['webm', 'flv', 'mp4']
|
||||||
|
@ -1665,7 +1657,7 @@ class InfoExtractor(object):
|
||||||
""" Either "http:" or "https:", depending on the user's preferences """
|
""" Either "http:" or "https:", depending on the user's preferences """
|
||||||
return (
|
return (
|
||||||
'http:'
|
'http:'
|
||||||
if self._downloader.params.get('prefer_insecure', False)
|
if self.get_param('prefer_insecure', False)
|
||||||
else 'https:')
|
else 'https:')
|
||||||
|
|
||||||
def _proto_relative_url(self, url, scheme=None):
|
def _proto_relative_url(self, url, scheme=None):
|
||||||
|
@ -3197,7 +3189,7 @@ class InfoExtractor(object):
|
||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError(msg)
|
raise ExtractorError(msg)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(msg)
|
self.report_warning(msg)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _float(self, v, name, fatal=False, **kwargs):
|
def _float(self, v, name, fatal=False, **kwargs):
|
||||||
|
@ -3207,7 +3199,7 @@ class InfoExtractor(object):
|
||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError(msg)
|
raise ExtractorError(msg)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(msg)
|
self.report_warning(msg)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||||
|
@ -3216,12 +3208,12 @@ class InfoExtractor(object):
|
||||||
0, name, value, port, port is not None, domain, True,
|
0, name, value, port, port is not None, domain, True,
|
||||||
domain.startswith('.'), path, True, secure, expire_time,
|
domain.startswith('.'), path, True, secure, expire_time,
|
||||||
discard, None, None, rest)
|
discard, None, None, rest)
|
||||||
self._downloader.cookiejar.set_cookie(cookie)
|
self.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||||
req = sanitized_Request(url)
|
req = sanitized_Request(url)
|
||||||
self._downloader.cookiejar.add_cookie_header(req)
|
self.cookiejar.add_cookie_header(req)
|
||||||
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||||
|
|
||||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||||
|
@ -3281,8 +3273,8 @@ class InfoExtractor(object):
|
||||||
return not any_restricted
|
return not any_restricted
|
||||||
|
|
||||||
def extract_subtitles(self, *args, **kwargs):
|
def extract_subtitles(self, *args, **kwargs):
|
||||||
if (self._downloader.params.get('writesubtitles', False)
|
if (self.get_param('writesubtitles', False)
|
||||||
or self._downloader.params.get('listsubtitles')):
|
or self.get_param('listsubtitles')):
|
||||||
return self._get_subtitles(*args, **kwargs)
|
return self._get_subtitles(*args, **kwargs)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
@ -3303,7 +3295,11 @@ class InfoExtractor(object):
|
||||||
""" Merge subtitle dictionaries, language by language. """
|
""" Merge subtitle dictionaries, language by language. """
|
||||||
|
|
||||||
# ..., * , target=None
|
# ..., * , target=None
|
||||||
target = kwargs.get('target') or dict(subtitle_dict1)
|
target = kwargs.get('target')
|
||||||
|
if target is None:
|
||||||
|
target = dict(subtitle_dict1)
|
||||||
|
else:
|
||||||
|
subtitle_dicts = (subtitle_dict1,) + subtitle_dicts
|
||||||
|
|
||||||
for subtitle_dict in subtitle_dicts:
|
for subtitle_dict in subtitle_dicts:
|
||||||
for lang in subtitle_dict:
|
for lang in subtitle_dict:
|
||||||
|
@ -3311,8 +3307,8 @@ class InfoExtractor(object):
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def extract_automatic_captions(self, *args, **kwargs):
|
def extract_automatic_captions(self, *args, **kwargs):
|
||||||
if (self._downloader.params.get('writeautomaticsub', False)
|
if (self.get_param('writeautomaticsub', False)
|
||||||
or self._downloader.params.get('listsubtitles')):
|
or self.get_param('listsubtitles')):
|
||||||
return self._get_automatic_captions(*args, **kwargs)
|
return self._get_automatic_captions(*args, **kwargs)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
@ -3320,9 +3316,9 @@ class InfoExtractor(object):
|
||||||
raise NotImplementedError('This method must be implemented by subclasses')
|
raise NotImplementedError('This method must be implemented by subclasses')
|
||||||
|
|
||||||
def mark_watched(self, *args, **kwargs):
|
def mark_watched(self, *args, **kwargs):
|
||||||
if (self._downloader.params.get('mark_watched', False)
|
if (self.get_param('mark_watched', False)
|
||||||
and (self._get_login_info()[0] is not None
|
and (self._get_login_info()[0] is not None
|
||||||
or self._downloader.params.get('cookiefile') is not None)):
|
or self.get_param('cookiefile') is not None)):
|
||||||
self._mark_watched(*args, **kwargs)
|
self._mark_watched(*args, **kwargs)
|
||||||
|
|
||||||
def _mark_watched(self, *args, **kwargs):
|
def _mark_watched(self, *args, **kwargs):
|
||||||
|
@ -3330,7 +3326,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
def geo_verification_headers(self):
|
def geo_verification_headers(self):
|
||||||
headers = {}
|
headers = {}
|
||||||
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
|
geo_verification_proxy = self.get_param('geo_verification_proxy')
|
||||||
if geo_verification_proxy:
|
if geo_verification_proxy:
|
||||||
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
||||||
return headers
|
return headers
|
||||||
|
|
|
@ -35,15 +35,6 @@ from ..utils import (
|
||||||
|
|
||||||
class ITVBaseIE(InfoExtractor):
|
class ITVBaseIE(InfoExtractor):
|
||||||
|
|
||||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
|
||||||
transform_source = kw.pop('transform_source', None)
|
|
||||||
fatal = kw.pop('fatal', True)
|
|
||||||
return self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
|
|
||||||
webpage, 'next.js data', group='js', fatal=fatal, **kw),
|
|
||||||
video_id, transform_source=transform_source, fatal=fatal)
|
|
||||||
|
|
||||||
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
|
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
|
||||||
if errnote is False:
|
if errnote is False:
|
||||||
return False
|
return False
|
||||||
|
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
|
||||||
|
|
||||||
class ITVIE(ITVBaseIE):
|
class ITVIE(ITVBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
|
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
|
||||||
_IE_DESC = 'ITVX'
|
IE_DESC = 'ITVX'
|
||||||
|
_WORKING = False
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'note': 'Hub URLs redirect to ITVX',
|
'note': 'Hub URLs redirect to ITVX',
|
||||||
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
||||||
|
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
|
||||||
'ext': determine_ext(href, 'vtt'),
|
'ext': determine_ext(href, 'vtt'),
|
||||||
})
|
})
|
||||||
|
|
||||||
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
|
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
|
||||||
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
|
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
|
||||||
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
|
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
|
||||||
info = self._og_extract(webpage, require_title=not title)
|
info = self._og_extract(webpage, require_title=not title)
|
||||||
|
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
|
||||||
|
|
||||||
class ITVBTCCIE(ITVBaseIE):
|
class ITVBTCCIE(ITVBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||||
_IE_DESC = 'ITV articles: News, British Touring Car Championship'
|
IE_DESC = 'ITV articles: News, British Touring Car Championship'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'note': 'British Touring Car Championship',
|
'note': 'British Touring Car Championship',
|
||||||
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
|
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
|
||||||
|
|
|
@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
|
||||||
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
|
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
|
||||||
['arch', '', 'http://ussenate-f.akamaihd.net/']
|
['arch', '', 'http://ussenate-f.akamaihd.net/']
|
||||||
]
|
]
|
||||||
_IE_NAME = 'senate.gov'
|
IE_NAME = 'senate.gov'
|
||||||
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
|
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
||||||
|
|
|
@ -27,6 +27,7 @@ from ..compat import (
|
||||||
)
|
)
|
||||||
from ..jsinterp import JSInterpreter
|
from ..jsinterp import JSInterpreter
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
bug_reports_message,
|
||||||
clean_html,
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
|
@ -48,6 +49,7 @@ from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
|
remove_end,
|
||||||
remove_start,
|
remove_start,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
@ -65,6 +67,7 @@ from ..utils import (
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
variadic,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -89,12 +92,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
'INNERTUBE_CONTEXT': {
|
'INNERTUBE_CONTEXT': {
|
||||||
'client': {
|
'client': {
|
||||||
'clientName': 'IOS',
|
'clientName': 'IOS',
|
||||||
'clientVersion': '19.45.4',
|
'clientVersion': '20.10.4',
|
||||||
'deviceMake': 'Apple',
|
'deviceMake': 'Apple',
|
||||||
'deviceModel': 'iPhone16,2',
|
'deviceModel': 'iPhone16,2',
|
||||||
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
|
'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
|
||||||
'osName': 'iPhone',
|
'osName': 'iPhone',
|
||||||
'osVersion': '18.1.0.22B83',
|
'osVersion': '18.3.2.22D82',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
|
||||||
|
@ -107,7 +110,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
'INNERTUBE_CONTEXT': {
|
'INNERTUBE_CONTEXT': {
|
||||||
'client': {
|
'client': {
|
||||||
'clientName': 'MWEB',
|
'clientName': 'MWEB',
|
||||||
'clientVersion': '2.20241202.07.00',
|
'clientVersion': '2.20250311.03.00',
|
||||||
# mweb previously did not require PO Token with this UA
|
# mweb previously did not require PO Token with this UA
|
||||||
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
|
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
|
||||||
},
|
},
|
||||||
|
@ -120,7 +123,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
'INNERTUBE_CONTEXT': {
|
'INNERTUBE_CONTEXT': {
|
||||||
'client': {
|
'client': {
|
||||||
'clientName': 'TVHTML5',
|
'clientName': 'TVHTML5',
|
||||||
'clientVersion': '7.20241201.18.00',
|
'clientVersion': '7.20250312.16.00',
|
||||||
|
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
||||||
|
@ -130,7 +134,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
'INNERTUBE_CONTEXT': {
|
'INNERTUBE_CONTEXT': {
|
||||||
'client': {
|
'client': {
|
||||||
'clientName': 'WEB',
|
'clientName': 'WEB',
|
||||||
'clientVersion': '2.20241126.01.00',
|
'clientVersion': '2.20250312.04.00',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||||
|
@ -339,14 +343,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
if not self._login():
|
if not self._login():
|
||||||
return
|
return
|
||||||
|
|
||||||
_DEFAULT_API_DATA = {
|
_DEFAULT_API_DATA = {'context': _INNERTUBE_CLIENTS['web']['INNERTUBE_CONTEXT']}
|
||||||
'context': {
|
|
||||||
'client': {
|
|
||||||
'clientName': 'WEB',
|
|
||||||
'clientVersion': '2.20201021.03.00',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||||
|
@ -460,6 +457,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_thumbnails(data, *path_list, **kw_final_key):
|
||||||
|
"""
|
||||||
|
Extract thumbnails from thumbnails dict
|
||||||
|
@param path_list: path list to level that contains 'thumbnails' key
|
||||||
|
"""
|
||||||
|
final_key = kw_final_key.get('final_key', 'thumbnails')
|
||||||
|
|
||||||
|
return traverse_obj(data, ((
|
||||||
|
tuple(variadic(path) + (final_key, Ellipsis)
|
||||||
|
for path in path_list or [()])), {
|
||||||
|
'url': ('url', T(url_or_none),
|
||||||
|
# Sometimes youtube gives a wrong thumbnail URL. See:
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/issues/233
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/28023
|
||||||
|
T(lambda u: update_url(u, query=None) if u and 'maxresdefault' in u else u)),
|
||||||
|
'height': ('height', T(int_or_none)),
|
||||||
|
'width': ('width', T(int_or_none)),
|
||||||
|
}, T(lambda t: t if t.get('url') else None)))
|
||||||
|
|
||||||
def _search_results(self, query, params):
|
def _search_results(self, query, params):
|
||||||
data = {
|
data = {
|
||||||
'context': {
|
'context': {
|
||||||
|
@ -474,11 +491,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
data['params'] = params
|
data['params'] = params
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
search = self._download_json(
|
search = self._download_json(
|
||||||
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
'https://www.youtube.com/youtubei/v1/search',
|
||||||
video_id='query "%s"' % query,
|
video_id='query "%s"' % query,
|
||||||
note='Downloading page %s' % page_num,
|
note='Downloading page %s' % page_num,
|
||||||
errnote='Unable to download API page', fatal=False,
|
errnote='Unable to download API page', fatal=False,
|
||||||
data=json.dumps(data).encode('utf8'),
|
data=json.dumps(data).encode('utf8'),
|
||||||
|
query={
|
||||||
|
# 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||||
|
'prettyPrint': 'false',
|
||||||
|
},
|
||||||
headers={'content-type': 'application/json'})
|
headers={'content-type': 'application/json'})
|
||||||
if not search:
|
if not search:
|
||||||
break
|
break
|
||||||
|
@ -669,9 +690,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'invidious': '|'.join(_INVIDIOUS_SITES),
|
'invidious': '|'.join(_INVIDIOUS_SITES),
|
||||||
}
|
}
|
||||||
_PLAYER_INFO_RE = (
|
_PLAYER_INFO_RE = (
|
||||||
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
|
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/(?:tv-)?player',
|
||||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias(?:_tce)?\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
r'\b(?P<id>vfl[a-zA-Z0-9_-]{6,})\b.*?\.js$',
|
||||||
)
|
)
|
||||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
||||||
|
|
||||||
|
@ -1564,6 +1585,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
|
'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_PLAYER_JS_VARIANT_MAP = (
|
||||||
|
('main', 'player_ias.vflset/en_US/base.js'),
|
||||||
|
('tce', 'player_ias_tce.vflset/en_US/base.js'),
|
||||||
|
('tv', 'tv-player-ias.vflset/tv-player-ias.js'),
|
||||||
|
('tv_es6', 'tv-player-es6.vflset/tv-player-es6.js'),
|
||||||
|
('phone', 'player-plasma-ias-phone-en_US.vflset/base.js'),
|
||||||
|
('tablet', 'player-plasma-ias-tablet-en_US.vflset/base.js'),
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
if parse_qs(url).get('list', [None])[0]:
|
if parse_qs(url).get('list', [None])[0]:
|
||||||
|
@ -1603,46 +1633,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
""" Return a string representation of a signature """
|
""" Return a string representation of a signature """
|
||||||
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
||||||
|
|
||||||
@classmethod
|
def _extract_player_info(self, player_url):
|
||||||
def _extract_player_info(cls, player_url):
|
try:
|
||||||
for player_re in cls._PLAYER_INFO_RE:
|
return self._search_regex(
|
||||||
id_m = re.search(player_re, player_url)
|
self._PLAYER_INFO_RE, player_url, 'player info', group='id')
|
||||||
if id_m:
|
except ExtractorError as e:
|
||||||
break
|
raise ExtractorError(
|
||||||
else:
|
'Cannot identify player %r' % (player_url,), cause=e)
|
||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
|
||||||
return id_m.group('id')
|
|
||||||
|
|
||||||
def _load_player(self, video_id, player_url, fatal=True, player_id=None):
|
def _player_js_cache_key(self, player_url, extra_id=None, _cache={}):
|
||||||
if not player_id:
|
if player_url not in _cache:
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
if player_id not in self._code_cache:
|
player_path = remove_start(
|
||||||
|
compat_urllib_parse.urlparse(player_url).path,
|
||||||
|
'/s/player/{0}/'.format(player_id))
|
||||||
|
variant = next((k for k, v in self._PLAYER_JS_VARIANT_MAP
|
||||||
|
if v == player_path), None)
|
||||||
|
if not variant:
|
||||||
|
variant = next(
|
||||||
|
(k for k, v in self._PLAYER_JS_VARIANT_MAP
|
||||||
|
if re.match(re.escape(v).replace('en_US', r'\w+') + '$', player_path)),
|
||||||
|
None)
|
||||||
|
if not variant:
|
||||||
|
self.write_debug(
|
||||||
|
'Unable to determine player JS variant\n'
|
||||||
|
' player = {0}'.format(player_url), only_once=True)
|
||||||
|
variant = re.sub(r'[^a-zA-Z0-9]', '_', remove_end(player_path, '.js'))
|
||||||
|
_cache[player_url] = join_nonempty(player_id, variant)
|
||||||
|
|
||||||
|
if extra_id:
|
||||||
|
extra_id = '-'.join((_cache[player_url], extra_id))
|
||||||
|
assert os.path.basename(extra_id) == extra_id
|
||||||
|
return extra_id
|
||||||
|
return _cache[player_url]
|
||||||
|
|
||||||
|
def _load_player(self, video_id, player_url, fatal=True):
|
||||||
|
player_js_key = self._player_js_cache_key(player_url)
|
||||||
|
if player_js_key not in self._code_cache:
|
||||||
code = self._download_webpage(
|
code = self._download_webpage(
|
||||||
player_url, video_id, fatal=fatal,
|
player_url, video_id, fatal=fatal,
|
||||||
note='Downloading player ' + player_id,
|
note='Downloading player {0}'.format(player_js_key),
|
||||||
errnote='Download of %s failed' % player_url)
|
errnote='Download of {0} failed'.format(player_url))
|
||||||
if code:
|
if code:
|
||||||
self._code_cache[player_id] = code
|
self._code_cache[player_js_key] = code
|
||||||
return self._code_cache[player_id] if fatal else self._code_cache.get(player_id)
|
return self._code_cache.get(player_js_key)
|
||||||
|
|
||||||
|
def _load_player_data_from_cache(self, name, player_url, extra_id=None):
|
||||||
|
cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
|
||||||
|
data = self._player_cache.get(cache_id)
|
||||||
|
if data:
|
||||||
|
return data
|
||||||
|
|
||||||
|
data = self.cache.load(*cache_id, min_ver='2025.04.07')
|
||||||
|
if data:
|
||||||
|
self._player_cache[cache_id] = data
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _store_player_data_to_cache(self, name, player_url, data, extra_id=None):
|
||||||
|
cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
|
||||||
|
|
||||||
|
if cache_id not in self._player_cache:
|
||||||
|
self.cache.store(cache_id[0], cache_id[1], data)
|
||||||
|
self._player_cache[cache_id] = data
|
||||||
|
|
||||||
|
def _remove_player_data_from_cache(self, name, player_url, extra_id=None):
|
||||||
|
cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
|
||||||
|
|
||||||
|
if cache_id in self._player_cache:
|
||||||
|
self.cache.clear(*cache_id)
|
||||||
|
self._player_cache.pop(cache_id, None)
|
||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||||
player_id = self._extract_player_info(player_url)
|
# player_id = self._extract_player_info(player_url)
|
||||||
|
|
||||||
# Read from filesystem cache
|
# Read from filesystem cache
|
||||||
func_id = 'js_{0}_{1}'.format(
|
extra_id = self._signature_cache_id(example_sig)
|
||||||
player_id, self._signature_cache_id(example_sig))
|
self.write_debug('Extracting signature function {0}-{1}'.format(player_url, extra_id))
|
||||||
assert os.path.basename(func_id) == func_id
|
cache_spec, code = self._load_player_data_from_cache(
|
||||||
|
'sigfuncs', player_url, extra_id=extra_id), None
|
||||||
self.write_debug('Extracting signature function {0}'.format(func_id))
|
|
||||||
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
|
|
||||||
|
|
||||||
if not cache_spec:
|
if not cache_spec:
|
||||||
code = self._load_player(video_id, player_url, player_id)
|
code = self._load_player(video_id, player_url)
|
||||||
if code:
|
if code:
|
||||||
res = self._parse_sig_js(code)
|
res = self._parse_sig_js(code)
|
||||||
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
||||||
cache_spec = [ord(c) for c in res(test_string)]
|
cache_spec = [ord(c) for c in res(test_string)]
|
||||||
self.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
self._store_player_data_to_cache(
|
||||||
|
'sigfuncs', player_url, cache_spec, extra_id=extra_id)
|
||||||
|
else:
|
||||||
|
self.report_warning(
|
||||||
|
'Failed to compute signature function {0}-{1}'.format(
|
||||||
|
player_url, extra_id))
|
||||||
|
|
||||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||||
|
|
||||||
|
@ -1688,6 +1769,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
' return %s\n') % (signature_id_tuple, expr_code)
|
' return %s\n') % (signature_id_tuple, expr_code)
|
||||||
self.to_screen('Extracted signature function:\n' + code)
|
self.to_screen('Extracted signature function:\n' + code)
|
||||||
|
|
||||||
|
def _extract_sig_fn(self, jsi, funcname):
|
||||||
|
var_ay = self._search_regex(
|
||||||
|
r'''(?x)
|
||||||
|
(?:\*/|\{|\n|^)\s*(?:'[^']+'\s*;\s*)
|
||||||
|
(var\s*[\w$]+\s*=\s*(?:
|
||||||
|
('|")(?:\\\2|(?!\2).)+\2\s*\.\s*split\(\s*('|")\W+\3\s*\)|
|
||||||
|
\[\s*(?:('|")(?:\\\4|(?!\4).)*\4\s*(?:(?=\])|,\s*))+\]
|
||||||
|
))(?=\s*[,;])
|
||||||
|
''', jsi.code, 'useful values', default='')
|
||||||
|
|
||||||
|
sig_fn = jsi.extract_function_code(funcname)
|
||||||
|
|
||||||
|
if var_ay:
|
||||||
|
sig_fn = (sig_fn[0], ';\n'.join((var_ay, sig_fn[1])))
|
||||||
|
|
||||||
|
return sig_fn
|
||||||
|
|
||||||
def _parse_sig_js(self, jscode):
|
def _parse_sig_js(self, jscode):
|
||||||
# Examples where `sig` is funcname:
|
# Examples where `sig` is funcname:
|
||||||
# sig=function(a){a=a.split(""); ... ;return a.join("")};
|
# sig=function(a){a=a.split(""); ... ;return a.join("")};
|
||||||
|
@ -1713,8 +1811,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
jscode, 'Initial JS player signature function name', group='sig')
|
jscode, 'Initial JS player signature function name', group='sig')
|
||||||
|
|
||||||
jsi = JSInterpreter(jscode)
|
jsi = JSInterpreter(jscode)
|
||||||
initial_function = jsi.extract_function(funcname)
|
|
||||||
return lambda s: initial_function([s])
|
initial_function = self._extract_sig_fn(jsi, funcname)
|
||||||
|
|
||||||
|
func = jsi.extract_function_from_code(*initial_function)
|
||||||
|
|
||||||
|
return lambda s: func([s])
|
||||||
|
|
||||||
def _cached(self, func, *cache_id):
|
def _cached(self, func, *cache_id):
|
||||||
def inner(*args, **kwargs):
|
def inner(*args, **kwargs):
|
||||||
|
@ -1774,6 +1876,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def _extract_n_function_name(self, jscode):
|
def _extract_n_function_name(self, jscode):
|
||||||
|
func_name, idx = None, None
|
||||||
|
|
||||||
|
def generic_n_function_search(func_name=None):
|
||||||
|
return self._search_regex(
|
||||||
|
r'''(?xs)
|
||||||
|
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
||||||
|
(?P<name>%s)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
|
||||||
|
\s*\{(?:(?!};).)+?(?:
|
||||||
|
["']enhanced_except_ |
|
||||||
|
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
|
||||||
|
)
|
||||||
|
''' % (func_name or r'(?!\d)[a-zA-Z\d_$]+',), jscode,
|
||||||
|
'Initial JS player n function name', group='name',
|
||||||
|
default=None if func_name else NO_DEFAULT)
|
||||||
|
|
||||||
|
# these special cases are redundant and probably obsolete (2025-04):
|
||||||
|
# they make the tests run ~10% faster without fallback warnings
|
||||||
|
r"""
|
||||||
func_name, idx = self._search_regex(
|
func_name, idx = self._search_regex(
|
||||||
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
|
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
|
||||||
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
|
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
|
||||||
|
@ -1800,41 +1920,59 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
\(\s*[\w$]+\s*\)
|
\(\s*[\w$]+\s*\)
|
||||||
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
|
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
|
||||||
default=(None, None))
|
default=(None, None))
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not func_name:
|
||||||
|
# nfunc=function(x){...}|function nfunc(x); ...
|
||||||
|
# ... var y=[nfunc]|y[idx]=nfunc);
|
||||||
|
# obvious REs hang, so use a two-stage tactic
|
||||||
|
for m in re.finditer(r'''(?x)
|
||||||
|
[\n;]var\s(?:(?:(?!,).)+,|\s)*?(?!\d)[\w$]+(?:\[(?P<idx>\d+)\])?\s*=\s*
|
||||||
|
(?(idx)|\[\s*)(?P<nfunc>(?!\d)[\w$]+)(?(idx)|\s*\])
|
||||||
|
\s*?[;\n]
|
||||||
|
''', jscode):
|
||||||
|
fn = self._search_regex(
|
||||||
|
r'[;,]\s*(function\s+)?({0})(?(1)|\s*=\s*function)\s*\((?!\d)[\w$]+\)\s*\{1}(?!\s*return\s)'.format(
|
||||||
|
re.escape(m.group('nfunc')), '{'),
|
||||||
|
jscode, 'Initial JS player n function name (2)', group=2, default=None)
|
||||||
|
if fn:
|
||||||
|
func_name = fn
|
||||||
|
idx = m.group('idx')
|
||||||
|
if generic_n_function_search(func_name):
|
||||||
|
# don't look any further
|
||||||
|
break
|
||||||
|
|
||||||
# thx bashonly: yt-dlp/yt-dlp/pull/10611
|
# thx bashonly: yt-dlp/yt-dlp/pull/10611
|
||||||
if not func_name:
|
if not func_name:
|
||||||
self.report_warning('Falling back to generic n function search')
|
self.report_warning('Falling back to generic n function search', only_once=True)
|
||||||
return self._search_regex(
|
return generic_n_function_search()
|
||||||
r'''(?xs)
|
|
||||||
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
|
||||||
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
|
|
||||||
\s*\{(?:(?!};).)+?(?:
|
|
||||||
["']enhanced_except_ |
|
|
||||||
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
|
|
||||||
)
|
|
||||||
''', jscode, 'Initial JS player n function name', group='name')
|
|
||||||
if not idx:
|
if not idx:
|
||||||
return func_name
|
return func_name
|
||||||
|
|
||||||
return self._search_json(
|
return self._search_json(
|
||||||
r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
|
r'(?<![\w-])var\s(?:(?:(?!,).)+,|\s)*?{0}\s*='.format(re.escape(func_name)), jscode,
|
||||||
'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
|
'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
|
||||||
func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
|
func_name, contains_pattern=r'\[.+\]', end_pattern='[,;]',
|
||||||
transform_source=js_to_json)[int(idx)]
|
transform_source=js_to_json)[int(idx)]
|
||||||
|
|
||||||
def _extract_n_function_code(self, video_id, player_url):
|
def _extract_n_function_code(self, video_id, player_url):
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
func_code = self.cache.load('youtube-nsig', player_id)
|
func_code = self._load_player_data_from_cache('nsig', player_url)
|
||||||
jscode = func_code or self._load_player(video_id, player_url)
|
jscode = func_code or self._load_player(video_id, player_url)
|
||||||
jsi = JSInterpreter(jscode)
|
jsi = JSInterpreter(jscode)
|
||||||
|
|
||||||
if func_code:
|
if func_code:
|
||||||
return jsi, player_id, func_code
|
return jsi, player_id, func_code
|
||||||
|
|
||||||
func_name = self._extract_n_function_name(jscode)
|
return self._extract_n_function_code_jsi(video_id, jsi, player_id, player_url)
|
||||||
|
|
||||||
func_code = jsi.extract_function_code(func_name)
|
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None, player_url=None):
|
||||||
|
func_name = self._extract_n_function_name(jsi.code)
|
||||||
|
|
||||||
self.cache.store('youtube-nsig', player_id, func_code)
|
func_code = self._extract_sig_fn(jsi, func_name)
|
||||||
|
if player_url:
|
||||||
|
self._store_player_data_to_cache('nsig', player_url, func_code)
|
||||||
return jsi, player_id, func_code
|
return jsi, player_id, func_code
|
||||||
|
|
||||||
def _extract_n_function_from_code(self, jsi, func_code):
|
def _extract_n_function_from_code(self, jsi, func_code):
|
||||||
|
@ -1867,7 +2005,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
n_param = n_param[-1]
|
n_param = n_param[-1]
|
||||||
n_response = decrypt_nsig(n_param)(n_param, video_id, player_url)
|
n_response = decrypt_nsig(n_param)(n_param, video_id, player_url)
|
||||||
if n_response is None:
|
if n_response is None:
|
||||||
# give up if descrambling failed
|
# give up and forget cached data if descrambling failed
|
||||||
|
self._remove_player_data_from_cache('nsig', player_url)
|
||||||
break
|
break
|
||||||
fmt['url'] = update_url_query(fmt['url'], {'n': n_response})
|
fmt['url'] = update_url_query(fmt['url'], {'n': n_response})
|
||||||
|
|
||||||
|
@ -1878,18 +2017,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
Required to tell API what sig/player version is in use.
|
Required to tell API what sig/player version is in use.
|
||||||
"""
|
"""
|
||||||
sts = traverse_obj(ytcfg, 'STS', expected_type=int)
|
sts = traverse_obj(ytcfg, 'STS', expected_type=int)
|
||||||
if not sts:
|
if sts:
|
||||||
# Attempt to extract from player
|
return sts
|
||||||
if player_url is None:
|
|
||||||
error_msg = 'Cannot extract signature timestamp without player_url.'
|
if not player_url:
|
||||||
if fatal:
|
error_msg = 'Cannot extract signature timestamp without player url'
|
||||||
raise ExtractorError(error_msg)
|
if fatal:
|
||||||
self.report_warning(error_msg)
|
raise ExtractorError(error_msg)
|
||||||
return
|
self.report_warning(error_msg)
|
||||||
code = self._load_player(video_id, player_url, fatal=fatal)
|
return None
|
||||||
sts = int_or_none(self._search_regex(
|
|
||||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
|
sts = self._load_player_data_from_cache('sts', player_url)
|
||||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
if sts:
|
||||||
|
return sts
|
||||||
|
|
||||||
|
# Attempt to extract from player
|
||||||
|
code = self._load_player(video_id, player_url, fatal=fatal)
|
||||||
|
sts = int_or_none(self._search_regex(
|
||||||
|
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
|
||||||
|
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||||
|
if sts:
|
||||||
|
self._store_player_data_to_cache('sts', player_url, sts)
|
||||||
|
|
||||||
return sts
|
return sts
|
||||||
|
|
||||||
def _mark_watched(self, video_id, player_response):
|
def _mark_watched(self, video_id, player_response):
|
||||||
|
@ -2103,7 +2252,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
video_details = merge_dicts(*traverse_obj(
|
video_details = merge_dicts(*traverse_obj(
|
||||||
(player_response, api_player_response),
|
(player_response, api_player_response),
|
||||||
(Ellipsis, 'videoDetails', T(dict))))
|
(Ellipsis, 'videoDetails', T(dict))))
|
||||||
player_response.update(api_player_response or {})
|
player_response.update(filter_dict(
|
||||||
|
api_player_response or {}, cndn=lambda k, _: k != 'captions'))
|
||||||
player_response['videoDetails'] = video_details
|
player_response['videoDetails'] = video_details
|
||||||
|
|
||||||
def is_agegated(playability):
|
def is_agegated(playability):
|
||||||
|
@ -2533,8 +2683,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
pctr = traverse_obj(
|
pctr = traverse_obj(
|
||||||
player_response,
|
(player_response, api_player_response),
|
||||||
('captions', 'playerCaptionsTracklistRenderer', T(dict)))
|
(Ellipsis, 'captions', 'playerCaptionsTracklistRenderer', T(dict)))
|
||||||
if pctr:
|
if pctr:
|
||||||
def process_language(container, base_url, lang_code, query):
|
def process_language(container, base_url, lang_code, query):
|
||||||
lang_subs = []
|
lang_subs = []
|
||||||
|
@ -2551,20 +2701,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
def process_subtitles():
|
def process_subtitles():
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for caption_track in traverse_obj(pctr, (
|
for caption_track in traverse_obj(pctr, (
|
||||||
'captionTracks', lambda _, v: v.get('baseUrl'))):
|
Ellipsis, 'captionTracks', lambda _, v: (
|
||||||
|
v.get('baseUrl') and v.get('languageCode')))):
|
||||||
base_url = self._yt_urljoin(caption_track['baseUrl'])
|
base_url = self._yt_urljoin(caption_track['baseUrl'])
|
||||||
if not base_url:
|
if not base_url:
|
||||||
continue
|
continue
|
||||||
|
lang_code = caption_track['languageCode']
|
||||||
if caption_track.get('kind') != 'asr':
|
if caption_track.get('kind') != 'asr':
|
||||||
lang_code = caption_track.get('languageCode')
|
|
||||||
if not lang_code:
|
|
||||||
continue
|
|
||||||
process_language(
|
process_language(
|
||||||
subtitles, base_url, lang_code, {})
|
subtitles, base_url, lang_code, {})
|
||||||
continue
|
continue
|
||||||
automatic_captions = {}
|
automatic_captions = {}
|
||||||
|
process_language(
|
||||||
|
automatic_captions, base_url, lang_code, {})
|
||||||
for translation_language in traverse_obj(pctr, (
|
for translation_language in traverse_obj(pctr, (
|
||||||
'translationLanguages', lambda _, v: v.get('languageCode'))):
|
Ellipsis, 'translationLanguages', lambda _, v: v.get('languageCode'))):
|
||||||
translation_language_code = translation_language['languageCode']
|
translation_language_code = translation_language['languageCode']
|
||||||
process_language(
|
process_language(
|
||||||
automatic_captions, base_url, translation_language_code,
|
automatic_captions, base_url, translation_language_code,
|
||||||
|
@ -3183,8 +3334,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
expected_type=txt_or_none)
|
expected_type=txt_or_none)
|
||||||
|
|
||||||
def _grid_entries(self, grid_renderer):
|
def _grid_entries(self, grid_renderer):
|
||||||
for item in grid_renderer['items']:
|
for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))):
|
||||||
if not isinstance(item, dict):
|
lockup_view_model = traverse_obj(item, ('lockupViewModel', T(dict)))
|
||||||
|
if lockup_view_model:
|
||||||
|
entry = self._extract_lockup_view_model(lockup_view_model)
|
||||||
|
if entry:
|
||||||
|
yield entry
|
||||||
continue
|
continue
|
||||||
renderer = self._extract_grid_item_renderer(item)
|
renderer = self._extract_grid_item_renderer(item)
|
||||||
if not isinstance(renderer, dict):
|
if not isinstance(renderer, dict):
|
||||||
|
@ -3268,6 +3423,39 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
continue
|
continue
|
||||||
yield self._extract_video(renderer)
|
yield self._extract_video(renderer)
|
||||||
|
|
||||||
|
def _extract_lockup_view_model(self, view_model):
|
||||||
|
content_id = view_model.get('contentId')
|
||||||
|
if not content_id:
|
||||||
|
return
|
||||||
|
content_type = view_model.get('contentType')
|
||||||
|
if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
|
||||||
|
self.report_warning(
|
||||||
|
'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True)
|
||||||
|
return
|
||||||
|
return merge_dicts(self.url_result(
|
||||||
|
update_url_query('https://www.youtube.com/playlist', {'list': content_id}),
|
||||||
|
ie=YoutubeTabIE.ie_key(), video_id=content_id), {
|
||||||
|
'title': traverse_obj(view_model, (
|
||||||
|
'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))),
|
||||||
|
'thumbnails': self._extract_thumbnails(view_model, (
|
||||||
|
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail',
|
||||||
|
'thumbnailViewModel', 'image'), final_key='sources'),
|
||||||
|
})
|
||||||
|
|
||||||
|
def _extract_shorts_lockup_view_model(self, view_model):
|
||||||
|
content_id = traverse_obj(view_model, (
|
||||||
|
'onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId',
|
||||||
|
T(lambda v: v if YoutubeIE.suitable(v) else None)))
|
||||||
|
if not content_id:
|
||||||
|
return
|
||||||
|
return merge_dicts(self.url_result(
|
||||||
|
content_id, ie=YoutubeIE.ie_key(), video_id=content_id), {
|
||||||
|
'title': traverse_obj(view_model, (
|
||||||
|
'overlayMetadata', 'primaryText', 'content', T(compat_str))),
|
||||||
|
'thumbnails': self._extract_thumbnails(
|
||||||
|
view_model, 'thumbnail', final_key='sources'),
|
||||||
|
})
|
||||||
|
|
||||||
def _video_entry(self, video_renderer):
|
def _video_entry(self, video_renderer):
|
||||||
video_id = video_renderer.get('videoId')
|
video_id = video_renderer.get('videoId')
|
||||||
if video_id:
|
if video_id:
|
||||||
|
@ -3314,10 +3502,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
yield entry
|
yield entry
|
||||||
|
|
||||||
def _rich_grid_entries(self, contents):
|
def _rich_grid_entries(self, contents):
|
||||||
for content in contents:
|
for content in traverse_obj(
|
||||||
content = traverse_obj(
|
contents, (Ellipsis, 'richItemRenderer', 'content'),
|
||||||
content, ('richItemRenderer', 'content'),
|
expected_type=dict):
|
||||||
expected_type=dict) or {}
|
|
||||||
video_renderer = traverse_obj(
|
video_renderer = traverse_obj(
|
||||||
content, 'videoRenderer', 'reelItemRenderer',
|
content, 'videoRenderer', 'reelItemRenderer',
|
||||||
expected_type=dict)
|
expected_type=dict)
|
||||||
|
@ -3325,6 +3512,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
entry = self._video_entry(video_renderer)
|
entry = self._video_entry(video_renderer)
|
||||||
if entry:
|
if entry:
|
||||||
yield entry
|
yield entry
|
||||||
|
# shorts item
|
||||||
|
shorts_lockup_view_model = content.get('shortsLockupViewModel')
|
||||||
|
if shorts_lockup_view_model:
|
||||||
|
entry = self._extract_shorts_lockup_view_model(shorts_lockup_view_model)
|
||||||
|
if entry:
|
||||||
|
yield entry
|
||||||
# playlist
|
# playlist
|
||||||
renderer = traverse_obj(
|
renderer = traverse_obj(
|
||||||
content, 'playlistRenderer', expected_type=dict) or {}
|
content, 'playlistRenderer', expected_type=dict) or {}
|
||||||
|
@ -3363,23 +3556,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
next_continuation = cls._extract_next_continuation_data(renderer)
|
next_continuation = cls._extract_next_continuation_data(renderer)
|
||||||
if next_continuation:
|
if next_continuation:
|
||||||
return next_continuation
|
return next_continuation
|
||||||
contents = []
|
for command in traverse_obj(renderer, (
|
||||||
for key in ('contents', 'items'):
|
('contents', 'items', 'rows'), Ellipsis, 'continuationItemRenderer',
|
||||||
contents.extend(try_get(renderer, lambda x: x[key], list) or [])
|
('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
|
||||||
for content in contents:
|
(('commandExecutorCommand', 'commands', Ellipsis), None), T(dict))):
|
||||||
if not isinstance(content, dict):
|
continuation = traverse_obj(command, ('continuationCommand', 'token', T(compat_str)))
|
||||||
continue
|
|
||||||
continuation_ep = try_get(
|
|
||||||
content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
|
|
||||||
dict)
|
|
||||||
if not continuation_ep:
|
|
||||||
continue
|
|
||||||
continuation = try_get(
|
|
||||||
continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
|
|
||||||
if not continuation:
|
if not continuation:
|
||||||
continue
|
continue
|
||||||
ctp = continuation_ep.get('clickTrackingParams')
|
ctp = command.get('clickTrackingParams')
|
||||||
return YoutubeTabIE._build_continuation_query(continuation, ctp)
|
return cls._build_continuation_query(continuation, ctp)
|
||||||
|
|
||||||
def _entries(self, tab, item_id, webpage):
|
def _entries(self, tab, item_id, webpage):
|
||||||
tab_content = try_get(tab, lambda x: x['content'], dict)
|
tab_content = try_get(tab, lambda x: x['content'], dict)
|
||||||
|
@ -3428,6 +3613,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
entry = self._video_entry(renderer)
|
entry = self._video_entry(renderer)
|
||||||
if entry:
|
if entry:
|
||||||
yield entry
|
yield entry
|
||||||
|
renderer = isr_content.get('richGridRenderer')
|
||||||
|
if renderer:
|
||||||
|
for from_ in self._rich_grid_entries(
|
||||||
|
traverse_obj(renderer, ('contents', Ellipsis, T(dict)))):
|
||||||
|
yield from_
|
||||||
|
continuation = self._extract_continuation(renderer)
|
||||||
|
continue
|
||||||
|
|
||||||
if not continuation:
|
if not continuation:
|
||||||
continuation = self._extract_continuation(is_renderer)
|
continuation = self._extract_continuation(is_renderer)
|
||||||
|
@ -3437,8 +3629,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
rich_grid_renderer = tab_content.get('richGridRenderer')
|
rich_grid_renderer = tab_content.get('richGridRenderer')
|
||||||
if not rich_grid_renderer:
|
if not rich_grid_renderer:
|
||||||
return
|
return
|
||||||
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
|
for from_ in self._rich_grid_entries(
|
||||||
yield entry
|
traverse_obj(rich_grid_renderer, ('contents', Ellipsis, T(dict)))):
|
||||||
|
yield from_
|
||||||
|
|
||||||
continuation = self._extract_continuation(rich_grid_renderer)
|
continuation = self._extract_continuation(rich_grid_renderer)
|
||||||
|
|
||||||
|
@ -3484,8 +3677,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
# Downloading page may result in intermittent 5xx HTTP error
|
# Downloading page may result in intermittent 5xx HTTP error
|
||||||
# that is usually worked around with a retry
|
# that is usually worked around with a retry
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
'https://www.youtube.com/youtubei/v1/browse',
|
||||||
None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
|
None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
|
||||||
|
query={
|
||||||
|
# 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||||
|
'prettyPrint': 'false',
|
||||||
|
},
|
||||||
headers=headers, data=json.dumps(data).encode('utf8'))
|
headers=headers, data=json.dumps(data).encode('utf8'))
|
||||||
break
|
break
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import calendar
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import operator
|
import operator
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
from functools import update_wrapper, wraps
|
from functools import update_wrapper, wraps
|
||||||
|
|
||||||
|
@ -12,8 +14,10 @@ from .utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
|
str_or_none,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
variadic,
|
variadic,
|
||||||
write_string,
|
write_string,
|
||||||
|
@ -150,6 +154,7 @@ def _js_to_primitive(v):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# more exact: yt-dlp/yt-dlp#12110
|
||||||
def _js_toString(v):
|
def _js_toString(v):
|
||||||
return (
|
return (
|
||||||
'undefined' if v is JS_Undefined
|
'undefined' if v is JS_Undefined
|
||||||
|
@ -158,7 +163,7 @@ def _js_toString(v):
|
||||||
else 'null' if v is None
|
else 'null' if v is None
|
||||||
# bool <= int: do this first
|
# bool <= int: do this first
|
||||||
else ('false', 'true')[v] if isinstance(v, bool)
|
else ('false', 'true')[v] if isinstance(v, bool)
|
||||||
else '{0:.7f}'.format(v).rstrip('.0') if isinstance(v, compat_numeric_types)
|
else re.sub(r'(?<=\d)\.?0*$', '', '{0:.7f}'.format(v)) if isinstance(v, compat_numeric_types)
|
||||||
else _js_to_primitive(v))
|
else _js_to_primitive(v))
|
||||||
|
|
||||||
|
|
||||||
|
@ -235,7 +240,7 @@ def _js_ternary(cndn, if_true=True, if_false=False):
|
||||||
def _js_unary_op(op):
|
def _js_unary_op(op):
|
||||||
|
|
||||||
@wraps_op(op)
|
@wraps_op(op)
|
||||||
def wrapped(_, a):
|
def wrapped(a, _):
|
||||||
return op(a)
|
return op(a)
|
||||||
|
|
||||||
return wrapped
|
return wrapped
|
||||||
|
@ -278,17 +283,6 @@ _OPERATORS = (
|
||||||
('**', _js_exp),
|
('**', _js_exp),
|
||||||
)
|
)
|
||||||
|
|
||||||
_COMP_OPERATORS = (
|
|
||||||
('===', _js_id_op(operator.is_)),
|
|
||||||
('!==', _js_id_op(operator.is_not)),
|
|
||||||
('==', _js_eq),
|
|
||||||
('!=', _js_neq),
|
|
||||||
('<=', _js_comp_op(operator.le)),
|
|
||||||
('>=', _js_comp_op(operator.ge)),
|
|
||||||
('<', _js_comp_op(operator.lt)),
|
|
||||||
('>', _js_comp_op(operator.gt)),
|
|
||||||
)
|
|
||||||
|
|
||||||
_LOG_OPERATORS = (
|
_LOG_OPERATORS = (
|
||||||
('|', _js_bit_op(operator.or_)),
|
('|', _js_bit_op(operator.or_)),
|
||||||
('^', _js_bit_op(operator.xor)),
|
('^', _js_bit_op(operator.xor)),
|
||||||
|
@ -305,13 +299,27 @@ _SC_OPERATORS = (
|
||||||
_UNARY_OPERATORS_X = (
|
_UNARY_OPERATORS_X = (
|
||||||
('void', _js_unary_op(lambda _: JS_Undefined)),
|
('void', _js_unary_op(lambda _: JS_Undefined)),
|
||||||
('typeof', _js_unary_op(_js_typeof)),
|
('typeof', _js_unary_op(_js_typeof)),
|
||||||
|
# avoid functools.partial here since Py2 update_wrapper(partial) -> no __module__
|
||||||
|
('!', _js_unary_op(lambda x: _js_ternary(x, if_true=False, if_false=True))),
|
||||||
)
|
)
|
||||||
|
|
||||||
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
|
_COMP_OPERATORS = (
|
||||||
|
('===', _js_id_op(operator.is_)),
|
||||||
|
('!==', _js_id_op(operator.is_not)),
|
||||||
|
('==', _js_eq),
|
||||||
|
('!=', _js_neq),
|
||||||
|
('<=', _js_comp_op(operator.le)),
|
||||||
|
('>=', _js_comp_op(operator.ge)),
|
||||||
|
('<', _js_comp_op(operator.lt)),
|
||||||
|
('>', _js_comp_op(operator.gt)),
|
||||||
|
)
|
||||||
|
|
||||||
|
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS + _SC_OPERATORS))
|
||||||
|
|
||||||
_NAME_RE = r'[a-zA-Z_$][\w$]*'
|
_NAME_RE = r'[a-zA-Z_$][\w$]*'
|
||||||
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
|
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
|
||||||
_QUOTES = '\'"/'
|
_QUOTES = '\'"/'
|
||||||
|
_NESTED_BRACKETS = r'[^[\]]+(?:\[[^[\]]+(?:\[[^\]]+\])?\])?'
|
||||||
|
|
||||||
|
|
||||||
class JS_Break(ExtractorError):
|
class JS_Break(ExtractorError):
|
||||||
|
@ -348,7 +356,7 @@ class LocalNameSpace(ChainMap):
|
||||||
raise NotImplementedError('Deleting is not supported')
|
raise NotImplementedError('Deleting is not supported')
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'LocalNameSpace%s' % (self.maps, )
|
return 'LocalNameSpace({0!r})'.format(self.maps)
|
||||||
|
|
||||||
|
|
||||||
class Debugger(object):
|
class Debugger(object):
|
||||||
|
@ -369,6 +377,9 @@ class Debugger(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def wrap_interpreter(cls, f):
|
def wrap_interpreter(cls, f):
|
||||||
|
if not cls.ENABLED:
|
||||||
|
return f
|
||||||
|
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
|
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
|
||||||
if cls.ENABLED and stmt.strip():
|
if cls.ENABLED and stmt.strip():
|
||||||
|
@ -404,11 +415,22 @@ class JSInterpreter(object):
|
||||||
class Exception(ExtractorError):
|
class Exception(ExtractorError):
|
||||||
def __init__(self, msg, *args, **kwargs):
|
def __init__(self, msg, *args, **kwargs):
|
||||||
expr = kwargs.pop('expr', None)
|
expr = kwargs.pop('expr', None)
|
||||||
|
msg = str_or_none(msg, default='"None"')
|
||||||
if expr is not None:
|
if expr is not None:
|
||||||
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
|
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
|
||||||
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
|
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
|
||||||
|
|
||||||
class JS_RegExp(object):
|
class JS_Object(object):
|
||||||
|
def __getitem__(self, key):
|
||||||
|
if hasattr(self, key):
|
||||||
|
return getattr(self, key)
|
||||||
|
raise KeyError(key)
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
"""Serialise the instance"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
class JS_RegExp(JS_Object):
|
||||||
RE_FLAGS = {
|
RE_FLAGS = {
|
||||||
# special knowledge: Python's re flags are bitmask values, current max 128
|
# special knowledge: Python's re flags are bitmask values, current max 128
|
||||||
# invent new bitmask values well above that for literal parsing
|
# invent new bitmask values well above that for literal parsing
|
||||||
|
@ -429,15 +451,24 @@ class JSInterpreter(object):
|
||||||
def __init__(self, pattern_txt, flags=0):
|
def __init__(self, pattern_txt, flags=0):
|
||||||
if isinstance(flags, compat_str):
|
if isinstance(flags, compat_str):
|
||||||
flags, _ = self.regex_flags(flags)
|
flags, _ = self.regex_flags(flags)
|
||||||
# First, avoid https://github.com/python/cpython/issues/74534
|
|
||||||
self.__self = None
|
self.__self = None
|
||||||
self.__pattern_txt = pattern_txt.replace('[[', r'[\[')
|
pattern_txt = str_or_none(pattern_txt) or '(?:)'
|
||||||
|
# escape unintended embedded flags
|
||||||
|
pattern_txt = re.sub(
|
||||||
|
r'(\(\?)([aiLmsux]*)(-[imsx]+:|(?<!\?)\))',
|
||||||
|
lambda m: ''.join(
|
||||||
|
(re.escape(m.group(1)), m.group(2), re.escape(m.group(3)))
|
||||||
|
if m.group(3) == ')'
|
||||||
|
else ('(?:', m.group(2), m.group(3))),
|
||||||
|
pattern_txt)
|
||||||
|
# Avoid https://github.com/python/cpython/issues/74534
|
||||||
|
self.source = pattern_txt.replace('[[', r'[\[')
|
||||||
self.__flags = flags
|
self.__flags = flags
|
||||||
|
|
||||||
def __instantiate(self):
|
def __instantiate(self):
|
||||||
if self.__self:
|
if self.__self:
|
||||||
return
|
return
|
||||||
self.__self = re.compile(self.__pattern_txt, self.__flags)
|
self.__self = re.compile(self.source, self.__flags)
|
||||||
# Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
|
# Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
|
||||||
for name in dir(self.__self):
|
for name in dir(self.__self):
|
||||||
# Only these? Obviously __class__, __init__.
|
# Only these? Obviously __class__, __init__.
|
||||||
|
@ -445,16 +476,15 @@ class JSInterpreter(object):
|
||||||
# that can't be setattr'd but also can't need to be copied.
|
# that can't be setattr'd but also can't need to be copied.
|
||||||
if name in ('__class__', '__init__', '__weakref__'):
|
if name in ('__class__', '__init__', '__weakref__'):
|
||||||
continue
|
continue
|
||||||
setattr(self, name, getattr(self.__self, name))
|
if name == 'flags':
|
||||||
|
setattr(self, name, getattr(self.__self, name, self.__flags))
|
||||||
|
else:
|
||||||
|
setattr(self, name, getattr(self.__self, name))
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
self.__instantiate()
|
self.__instantiate()
|
||||||
# make Py 2.6 conform to its lying documentation
|
if name == 'pattern':
|
||||||
if name == 'flags':
|
self.pattern = self.source
|
||||||
self.flags = self.__flags
|
|
||||||
return self.flags
|
|
||||||
elif name == 'pattern':
|
|
||||||
self.pattern = self.__pattern_txt
|
|
||||||
return self.pattern
|
return self.pattern
|
||||||
elif hasattr(self.__self, name):
|
elif hasattr(self.__self, name):
|
||||||
v = getattr(self.__self, name)
|
v = getattr(self.__self, name)
|
||||||
|
@ -462,6 +492,26 @@ class JSInterpreter(object):
|
||||||
return v
|
return v
|
||||||
elif name in ('groupindex', 'groups'):
|
elif name in ('groupindex', 'groups'):
|
||||||
return 0 if name == 'groupindex' else {}
|
return 0 if name == 'groupindex' else {}
|
||||||
|
else:
|
||||||
|
flag_attrs = ( # order by 2nd elt
|
||||||
|
('hasIndices', 'd'),
|
||||||
|
('global', 'g'),
|
||||||
|
('ignoreCase', 'i'),
|
||||||
|
('multiline', 'm'),
|
||||||
|
('dotAll', 's'),
|
||||||
|
('unicode', 'u'),
|
||||||
|
('unicodeSets', 'v'),
|
||||||
|
('sticky', 'y'),
|
||||||
|
)
|
||||||
|
for k, c in flag_attrs:
|
||||||
|
if name == k:
|
||||||
|
return bool(self.RE_FLAGS[c] & self.__flags)
|
||||||
|
else:
|
||||||
|
if name == 'flags':
|
||||||
|
return ''.join(
|
||||||
|
(c if self.RE_FLAGS[c] & self.__flags else '')
|
||||||
|
for _, c in flag_attrs)
|
||||||
|
|
||||||
raise AttributeError('{0} has no attribute named {1}'.format(self, name))
|
raise AttributeError('{0} has no attribute named {1}'.format(self, name))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -475,6 +525,85 @@ class JSInterpreter(object):
|
||||||
flags |= cls.RE_FLAGS[ch]
|
flags |= cls.RE_FLAGS[ch]
|
||||||
return flags, expr[idx + 1:]
|
return flags, expr[idx + 1:]
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
return '(/{0}/{1})'.format(
|
||||||
|
re.sub(r'(?<!\\)/', r'\/', self.source),
|
||||||
|
self.flags)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def escape(string_):
|
||||||
|
return re.escape(string_)
|
||||||
|
|
||||||
|
class JS_Date(JS_Object):
|
||||||
|
_t = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def __ymd_etc(*args, **kw_is_utc):
|
||||||
|
# args: year, monthIndex, day, hours, minutes, seconds, milliseconds
|
||||||
|
is_utc = kw_is_utc.get('is_utc', False)
|
||||||
|
|
||||||
|
args = list(args[:7])
|
||||||
|
args += [0] * (9 - len(args))
|
||||||
|
args[1] += 1 # month 0..11 -> 1..12
|
||||||
|
ms = args[6]
|
||||||
|
for i in range(6, 9):
|
||||||
|
args[i] = -1 # don't know
|
||||||
|
if is_utc:
|
||||||
|
args[-1] = 1
|
||||||
|
# TODO: [MDN] When a segment overflows or underflows its expected
|
||||||
|
# range, it usually "carries over to" or "borrows from" the higher segment.
|
||||||
|
try:
|
||||||
|
mktime = calendar.timegm if is_utc else time.mktime
|
||||||
|
return mktime(time.struct_time(args)) * 1000 + ms
|
||||||
|
except (OverflowError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def UTC(cls, *args):
|
||||||
|
t = cls.__ymd_etc(*args, is_utc=True)
|
||||||
|
return _NaN if t is None else t
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse(date_str, **kw_is_raw):
|
||||||
|
is_raw = kw_is_raw.get('is_raw', False)
|
||||||
|
|
||||||
|
t = unified_timestamp(str_or_none(date_str), False)
|
||||||
|
return int(t * 1000) if t is not None else t if is_raw else _NaN
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def now(**kw_is_raw):
|
||||||
|
is_raw = kw_is_raw.get('is_raw', False)
|
||||||
|
|
||||||
|
t = time.time()
|
||||||
|
return int(t * 1000) if t is not None else t if is_raw else _NaN
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
if not args:
|
||||||
|
args = [self.now(is_raw=True)]
|
||||||
|
if len(args) == 1:
|
||||||
|
if isinstance(args[0], JSInterpreter.JS_Date):
|
||||||
|
self._t = int_or_none(args[0].valueOf(), default=None)
|
||||||
|
else:
|
||||||
|
arg_type = _js_typeof(args[0])
|
||||||
|
if arg_type == 'string':
|
||||||
|
self._t = self.parse(args[0], is_raw=True)
|
||||||
|
elif arg_type == 'number':
|
||||||
|
self._t = int(args[0])
|
||||||
|
else:
|
||||||
|
self._t = self.__ymd_etc(*args)
|
||||||
|
|
||||||
|
def toString(self):
|
||||||
|
try:
|
||||||
|
return time.strftime('%a %b %0d %Y %H:%M:%S %Z%z', self._t).rstrip()
|
||||||
|
except TypeError:
|
||||||
|
return "Invalid Date"
|
||||||
|
|
||||||
|
def valueOf(self):
|
||||||
|
return _NaN if self._t is None else self._t
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
return '(new Date({0}))'.format(self.toString())
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __op_chars(cls):
|
def __op_chars(cls):
|
||||||
op_chars = set(';,[')
|
op_chars = set(';,[')
|
||||||
|
@ -578,59 +707,7 @@ class JSInterpreter(object):
|
||||||
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X))
|
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X))
|
||||||
return _cached
|
return _cached
|
||||||
|
|
||||||
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
def _separate_at_op(self, expr, max_split=None):
|
||||||
if op in ('||', '&&'):
|
|
||||||
if (op == '&&') ^ _js_ternary(left_val):
|
|
||||||
return left_val # short circuiting
|
|
||||||
elif op == '??':
|
|
||||||
if left_val not in (None, JS_Undefined):
|
|
||||||
return left_val
|
|
||||||
elif op == '?':
|
|
||||||
right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
|
|
||||||
|
|
||||||
right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
|
|
||||||
opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
|
|
||||||
if not opfunc:
|
|
||||||
return right_val
|
|
||||||
|
|
||||||
try:
|
|
||||||
# print('Eval:', opfunc.__name__, left_val, right_val)
|
|
||||||
return opfunc(left_val, right_val)
|
|
||||||
except Exception as e:
|
|
||||||
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
|
|
||||||
|
|
||||||
def _index(self, obj, idx, allow_undefined=True):
|
|
||||||
if idx == 'length' and isinstance(obj, list):
|
|
||||||
return len(obj)
|
|
||||||
try:
|
|
||||||
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
|
|
||||||
except (TypeError, KeyError, IndexError) as e:
|
|
||||||
if allow_undefined:
|
|
||||||
# when is not allowed?
|
|
||||||
return JS_Undefined
|
|
||||||
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
|
||||||
|
|
||||||
def _dump(self, obj, namespace):
|
|
||||||
try:
|
|
||||||
return json.dumps(obj)
|
|
||||||
except TypeError:
|
|
||||||
return self._named_object(namespace, obj)
|
|
||||||
|
|
||||||
# used below
|
|
||||||
_VAR_RET_THROW_RE = re.compile(r'''(?x)
|
|
||||||
(?:(?P<var>var|const|let)\s+|(?P<ret>return)(?:\s+|(?=["'])|$)|(?P<throw>throw)\s+)
|
|
||||||
''')
|
|
||||||
_COMPOUND_RE = re.compile(r'''(?x)
|
|
||||||
(?P<try>try)\s*\{|
|
|
||||||
(?P<if>if)\s*\(|
|
|
||||||
(?P<switch>switch)\s*\(|
|
|
||||||
(?P<for>for)\s*\(|
|
|
||||||
(?P<while>while)\s*\(
|
|
||||||
''')
|
|
||||||
_FINALLY_RE = re.compile(r'finally\s*\{')
|
|
||||||
_SWITCH_RE = re.compile(r'switch\s*\(')
|
|
||||||
|
|
||||||
def handle_operators(self, expr, local_vars, allow_recursion):
|
|
||||||
|
|
||||||
for op, _ in self._all_operators():
|
for op, _ in self._all_operators():
|
||||||
# hackety: </> have higher priority than <</>>, but don't confuse them
|
# hackety: </> have higher priority than <</>>, but don't confuse them
|
||||||
|
@ -658,23 +735,98 @@ class JSInterpreter(object):
|
||||||
if separated[-1][-1:] in self.OP_CHARS:
|
if separated[-1][-1:] in self.OP_CHARS:
|
||||||
right_expr = separated.pop() + right_expr
|
right_expr = separated.pop() + right_expr
|
||||||
# hanging op at end of left => unary + (strip) or - (push right)
|
# hanging op at end of left => unary + (strip) or - (push right)
|
||||||
left_val = separated[-1] if separated else ''
|
separated.append(right_expr)
|
||||||
for dm_op in ('*', '%', '/', '**'):
|
dm_ops = ('*', '%', '/', '**')
|
||||||
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
dm_chars = set(''.join(dm_ops))
|
||||||
if len(bodmas) > 1 and not bodmas[-1].strip():
|
|
||||||
expr = op.join(separated) + op + right_expr
|
|
||||||
if len(separated) > 1:
|
|
||||||
separated.pop()
|
|
||||||
right_expr = op.join((left_val, right_expr))
|
|
||||||
else:
|
|
||||||
separated = [op.join((left_val, right_expr))]
|
|
||||||
right_expr = None
|
|
||||||
break
|
|
||||||
if right_expr is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
|
def yield_terms(s):
|
||||||
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True
|
skip = False
|
||||||
|
for i, term in enumerate(s[:-1]):
|
||||||
|
if skip:
|
||||||
|
skip = False
|
||||||
|
continue
|
||||||
|
if not (dm_chars & set(term)):
|
||||||
|
yield term
|
||||||
|
continue
|
||||||
|
for dm_op in dm_ops:
|
||||||
|
bodmas = list(self._separate(term, dm_op, skip_delims=skip_delim))
|
||||||
|
if len(bodmas) > 1 and not bodmas[-1].strip():
|
||||||
|
bodmas[-1] = (op if op == '-' else '') + s[i + 1]
|
||||||
|
yield dm_op.join(bodmas)
|
||||||
|
skip = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if term:
|
||||||
|
yield term
|
||||||
|
|
||||||
|
if not skip and s[-1]:
|
||||||
|
yield s[-1]
|
||||||
|
|
||||||
|
separated = list(yield_terms(separated))
|
||||||
|
right_expr = separated.pop() if len(separated) > 1 else None
|
||||||
|
expr = op.join(separated)
|
||||||
|
if right_expr is None:
|
||||||
|
continue
|
||||||
|
return op, separated, right_expr
|
||||||
|
|
||||||
|
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
||||||
|
if op in ('||', '&&'):
|
||||||
|
if (op == '&&') ^ _js_ternary(left_val):
|
||||||
|
return left_val # short circuiting
|
||||||
|
elif op == '??':
|
||||||
|
if left_val not in (None, JS_Undefined):
|
||||||
|
return left_val
|
||||||
|
elif op == '?':
|
||||||
|
right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
|
||||||
|
|
||||||
|
right_val = self.interpret_expression(right_expr, local_vars, allow_recursion) if right_expr else left_val
|
||||||
|
opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
|
||||||
|
if not opfunc:
|
||||||
|
return right_val
|
||||||
|
|
||||||
|
try:
|
||||||
|
# print('Eval:', opfunc.__name__, left_val, right_val)
|
||||||
|
return opfunc(left_val, right_val)
|
||||||
|
except Exception as e:
|
||||||
|
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
|
||||||
|
|
||||||
|
def _index(self, obj, idx, allow_undefined=None):
|
||||||
|
if idx == 'length' and isinstance(obj, list):
|
||||||
|
return len(obj)
|
||||||
|
try:
|
||||||
|
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
|
||||||
|
except (TypeError, KeyError, IndexError, ValueError) as e:
|
||||||
|
# allow_undefined is None gives correct behaviour
|
||||||
|
if allow_undefined or (
|
||||||
|
allow_undefined is None and not isinstance(e, TypeError)):
|
||||||
|
return JS_Undefined
|
||||||
|
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
||||||
|
|
||||||
|
def _dump(self, obj, namespace):
|
||||||
|
if obj is JS_Undefined:
|
||||||
|
return 'undefined'
|
||||||
|
try:
|
||||||
|
return json.dumps(obj)
|
||||||
|
except TypeError:
|
||||||
|
return self._named_object(namespace, obj)
|
||||||
|
|
||||||
|
# used below
|
||||||
|
_VAR_RET_THROW_RE = re.compile(r'''(?x)
|
||||||
|
(?:(?P<var>var|const|let)\s+|(?P<ret>return)(?:\s+|(?=["'])|$)|(?P<throw>throw)\s+)
|
||||||
|
''')
|
||||||
|
_COMPOUND_RE = re.compile(r'''(?x)
|
||||||
|
(?P<try>try)\s*\{|
|
||||||
|
(?P<if>if)\s*\(|
|
||||||
|
(?P<switch>switch)\s*\(|
|
||||||
|
(?P<for>for)\s*\(|
|
||||||
|
(?P<while>while)\s*\(
|
||||||
|
''')
|
||||||
|
_FINALLY_RE = re.compile(r'finally\s*\{')
|
||||||
|
_SWITCH_RE = re.compile(r'switch\s*\(')
|
||||||
|
|
||||||
|
def _eval_operator(self, op, left_expr, right_expr, expr, local_vars, allow_recursion):
|
||||||
|
left_val = self.interpret_expression(left_expr, local_vars, allow_recursion)
|
||||||
|
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion)
|
||||||
|
|
||||||
@Debugger.wrap_interpreter
|
@Debugger.wrap_interpreter
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
|
@ -715,7 +867,7 @@ class JSInterpreter(object):
|
||||||
|
|
||||||
new_kw, _, obj = expr.partition('new ')
|
new_kw, _, obj = expr.partition('new ')
|
||||||
if not new_kw:
|
if not new_kw:
|
||||||
for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)),
|
for klass, konstr in (('Date', lambda *x: self.JS_Date(*x).valueOf()),
|
||||||
('RegExp', self.JS_RegExp),
|
('RegExp', self.JS_RegExp),
|
||||||
('Error', self.Exception)):
|
('Error', self.Exception)):
|
||||||
if not obj.startswith(klass + '('):
|
if not obj.startswith(klass + '('):
|
||||||
|
@ -730,15 +882,19 @@ class JSInterpreter(object):
|
||||||
else:
|
else:
|
||||||
raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
|
raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
|
||||||
|
|
||||||
|
# apply unary operators (see new above)
|
||||||
for op, _ in _UNARY_OPERATORS_X:
|
for op, _ in _UNARY_OPERATORS_X:
|
||||||
if not expr.startswith(op):
|
if not expr.startswith(op):
|
||||||
continue
|
continue
|
||||||
operand = expr[len(op):]
|
operand = expr[len(op):]
|
||||||
if not operand or operand[0] != ' ':
|
if not operand or (op.isalpha() and operand[0] != ' '):
|
||||||
continue
|
continue
|
||||||
op_result = self.handle_operators(expr, local_vars, allow_recursion)
|
separated = self._separate_at_op(operand, max_split=1)
|
||||||
if op_result:
|
if separated:
|
||||||
return op_result[0], should_return
|
next_op, separated, right_expr = separated
|
||||||
|
separated.append(right_expr)
|
||||||
|
operand = next_op.join(separated)
|
||||||
|
return self._eval_operator(op, operand, '', expr, local_vars, allow_recursion), should_return
|
||||||
|
|
||||||
if expr.startswith('{'):
|
if expr.startswith('{'):
|
||||||
inner, outer = self._separate_at_paren(expr)
|
inner, outer = self._separate_at_paren(expr)
|
||||||
|
@ -933,15 +1089,18 @@ class JSInterpreter(object):
|
||||||
|
|
||||||
m = re.match(r'''(?x)
|
m = re.match(r'''(?x)
|
||||||
(?P<assign>
|
(?P<assign>
|
||||||
(?P<out>{_NAME_RE})(?:\[(?P<out_idx>(?:.+?\]\s*\[)*.+?)\])?\s*
|
(?P<out>{_NAME_RE})(?P<out_idx>(?:\[{_NESTED_BRACKETS}\])+)?\s*
|
||||||
(?P<op>{_OPERATOR_RE})?
|
(?P<op>{_OPERATOR_RE})?
|
||||||
=(?!=)(?P<expr>.*)$
|
=(?!=)(?P<expr>.*)$
|
||||||
)|(?P<return>
|
)|(?P<return>
|
||||||
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
|
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
|
||||||
)|(?P<indexing>
|
|
||||||
(?P<in>{_NAME_RE})\[(?P<in_idx>(?:.+?\]\s*\[)*.+?)\]$
|
|
||||||
)|(?P<attribute>
|
)|(?P<attribute>
|
||||||
(?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
|
(?P<var>{_NAME_RE})(?:
|
||||||
|
(?P<nullish>\?)?\.(?P<member>[^(]+)|
|
||||||
|
\[(?P<member2>{_NESTED_BRACKETS})\]
|
||||||
|
)\s*
|
||||||
|
)|(?P<indexing>
|
||||||
|
(?P<in>{_NAME_RE})(?P<in_idx>\[.+\])$
|
||||||
)|(?P<function>
|
)|(?P<function>
|
||||||
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
|
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
|
||||||
)'''.format(**globals()), expr)
|
)'''.format(**globals()), expr)
|
||||||
|
@ -956,13 +1115,18 @@ class JSInterpreter(object):
|
||||||
elif left_val in (None, JS_Undefined):
|
elif left_val in (None, JS_Undefined):
|
||||||
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
|
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
|
||||||
|
|
||||||
indexes = re.split(r'\]\s*\[', m.group('out_idx'))
|
indexes = md['out_idx']
|
||||||
for i, idx in enumerate(indexes, 1):
|
while indexes:
|
||||||
|
idx, indexes = self._separate_at_paren(indexes)
|
||||||
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
||||||
if i < len(indexes):
|
if indexes:
|
||||||
left_val = self._index(left_val, idx)
|
left_val = self._index(left_val, idx)
|
||||||
if isinstance(idx, float):
|
if isinstance(idx, float):
|
||||||
idx = int(idx)
|
idx = int(idx)
|
||||||
|
if isinstance(left_val, list) and len(left_val) <= int_or_none(idx, default=-1):
|
||||||
|
# JS Array is a sparsely assignable list
|
||||||
|
# TODO: handle extreme sparsity without memory bloat, eg using auxiliary dict
|
||||||
|
left_val.extend((idx - len(left_val) + 1) * [JS_Undefined])
|
||||||
left_val[idx] = self._operator(
|
left_val[idx] = self._operator(
|
||||||
m.group('op'), self._index(left_val, idx) if m.group('op') else None,
|
m.group('op'), self._index(left_val, idx) if m.group('op') else None,
|
||||||
m.group('expr'), expr, local_vars, allow_recursion)
|
m.group('expr'), expr, local_vars, allow_recursion)
|
||||||
|
@ -1000,14 +1164,17 @@ class JSInterpreter(object):
|
||||||
|
|
||||||
if md.get('indexing'):
|
if md.get('indexing'):
|
||||||
val = local_vars[m.group('in')]
|
val = local_vars[m.group('in')]
|
||||||
for idx in re.split(r'\]\s*\[', m.group('in_idx')):
|
indexes = m.group('in_idx')
|
||||||
|
while indexes:
|
||||||
|
idx, indexes = self._separate_at_paren(indexes)
|
||||||
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
||||||
val = self._index(val, idx)
|
val = self._index(val, idx)
|
||||||
return val, should_return
|
return val, should_return
|
||||||
|
|
||||||
op_result = self.handle_operators(expr, local_vars, allow_recursion)
|
separated = self._separate_at_op(expr)
|
||||||
if op_result:
|
if separated:
|
||||||
return op_result[0], should_return
|
op, separated, right_expr = separated
|
||||||
|
return self._eval_operator(op, op.join(separated), right_expr, expr, local_vars, allow_recursion), should_return
|
||||||
|
|
||||||
if md.get('attribute'):
|
if md.get('attribute'):
|
||||||
variable, member, nullish = m.group('var', 'member', 'nullish')
|
variable, member, nullish = m.group('var', 'member', 'nullish')
|
||||||
|
@ -1028,12 +1195,15 @@ class JSInterpreter(object):
|
||||||
def eval_method(variable, member):
|
def eval_method(variable, member):
|
||||||
if (variable, member) == ('console', 'debug'):
|
if (variable, member) == ('console', 'debug'):
|
||||||
if Debugger.ENABLED:
|
if Debugger.ENABLED:
|
||||||
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
|
Debugger.write(self.interpret_expression('[{0}]'.format(arg_str), local_vars, allow_recursion))
|
||||||
return
|
return
|
||||||
types = {
|
types = {
|
||||||
'String': compat_str,
|
'String': compat_str,
|
||||||
'Math': float,
|
'Math': float,
|
||||||
'Array': list,
|
'Array': list,
|
||||||
|
'Date': self.JS_Date,
|
||||||
|
'RegExp': self.JS_RegExp,
|
||||||
|
# 'Error': self.Exception, # has no std static methods
|
||||||
}
|
}
|
||||||
obj = local_vars.get(variable)
|
obj = local_vars.get(variable)
|
||||||
if obj in (JS_Undefined, None):
|
if obj in (JS_Undefined, None):
|
||||||
|
@ -1041,7 +1211,7 @@ class JSInterpreter(object):
|
||||||
if obj is JS_Undefined:
|
if obj is JS_Undefined:
|
||||||
try:
|
try:
|
||||||
if variable not in self._objects:
|
if variable not in self._objects:
|
||||||
self._objects[variable] = self.extract_object(variable)
|
self._objects[variable] = self.extract_object(variable, local_vars)
|
||||||
obj = self._objects[variable]
|
obj = self._objects[variable]
|
||||||
except self.Exception:
|
except self.Exception:
|
||||||
if not nullish:
|
if not nullish:
|
||||||
|
@ -1052,7 +1222,7 @@ class JSInterpreter(object):
|
||||||
|
|
||||||
# Member access
|
# Member access
|
||||||
if arg_str is None:
|
if arg_str is None:
|
||||||
return self._index(obj, member)
|
return self._index(obj, member, nullish)
|
||||||
|
|
||||||
# Function call
|
# Function call
|
||||||
argvals = [
|
argvals = [
|
||||||
|
@ -1086,6 +1256,8 @@ class JSInterpreter(object):
|
||||||
assertion(len(argvals) == 2, 'takes two arguments')
|
assertion(len(argvals) == 2, 'takes two arguments')
|
||||||
return argvals[0] ** argvals[1]
|
return argvals[0] ** argvals[1]
|
||||||
raise self.Exception('Unsupported Math method ' + member, expr=expr)
|
raise self.Exception('Unsupported Math method ' + member, expr=expr)
|
||||||
|
elif obj is self.JS_Date:
|
||||||
|
return getattr(obj, member)(*argvals)
|
||||||
|
|
||||||
if member == 'split':
|
if member == 'split':
|
||||||
assertion(len(argvals) <= 2, 'takes at most two arguments')
|
assertion(len(argvals) <= 2, 'takes at most two arguments')
|
||||||
|
@ -1126,9 +1298,10 @@ class JSInterpreter(object):
|
||||||
elif member == 'join':
|
elif member == 'join':
|
||||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
assertion(len(argvals) <= 1, 'takes at most one argument')
|
assertion(len(argvals) <= 1, 'takes at most one argument')
|
||||||
return (',' if len(argvals) == 0 else argvals[0]).join(
|
return (',' if len(argvals) == 0 or argvals[0] in (None, JS_Undefined)
|
||||||
('' if x in (None, JS_Undefined) else _js_toString(x))
|
else argvals[0]).join(
|
||||||
for x in obj)
|
('' if x in (None, JS_Undefined) else _js_toString(x))
|
||||||
|
for x in obj)
|
||||||
elif member == 'reverse':
|
elif member == 'reverse':
|
||||||
assertion(not argvals, 'does not take any arguments')
|
assertion(not argvals, 'does not take any arguments')
|
||||||
obj.reverse()
|
obj.reverse()
|
||||||
|
@ -1192,7 +1365,8 @@ class JSInterpreter(object):
|
||||||
assertion(len(argvals) == 2, 'takes exactly two arguments')
|
assertion(len(argvals) == 2, 'takes exactly two arguments')
|
||||||
# TODO: argvals[1] callable, other Py vs JS edge cases
|
# TODO: argvals[1] callable, other Py vs JS edge cases
|
||||||
if isinstance(argvals[0], self.JS_RegExp):
|
if isinstance(argvals[0], self.JS_RegExp):
|
||||||
count = 0 if argvals[0].flags & self.JS_RegExp.RE_FLAGS['g'] else 1
|
# access JS member with Py reserved name
|
||||||
|
count = 0 if self._index(argvals[0], 'global') else 1
|
||||||
assertion(member != 'replaceAll' or count == 0,
|
assertion(member != 'replaceAll' or count == 0,
|
||||||
'replaceAll must be called with a global RegExp')
|
'replaceAll must be called with a global RegExp')
|
||||||
return argvals[0].sub(argvals[1], obj, count=count)
|
return argvals[0].sub(argvals[1], obj, count=count)
|
||||||
|
@ -1233,7 +1407,7 @@ class JSInterpreter(object):
|
||||||
for v in self._separate(list_txt):
|
for v in self._separate(list_txt):
|
||||||
yield self.interpret_expression(v, local_vars, allow_recursion)
|
yield self.interpret_expression(v, local_vars, allow_recursion)
|
||||||
|
|
||||||
def extract_object(self, objname):
|
def extract_object(self, objname, *global_stack):
|
||||||
_FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
|
_FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
|
||||||
obj = {}
|
obj = {}
|
||||||
fields = next(filter(None, (
|
fields = next(filter(None, (
|
||||||
|
@ -1254,7 +1428,8 @@ class JSInterpreter(object):
|
||||||
fields):
|
fields):
|
||||||
argnames = self.build_arglist(f.group('args'))
|
argnames = self.build_arglist(f.group('args'))
|
||||||
name = remove_quotes(f.group('key'))
|
name = remove_quotes(f.group('key'))
|
||||||
obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
|
obj[name] = function_with_repr(
|
||||||
|
self.build_function(argnames, f.group('code'), *global_stack), 'F<{0}>'.format(name))
|
||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
@ -1286,19 +1461,21 @@ class JSInterpreter(object):
|
||||||
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
|
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
|
||||||
return self.build_arglist(func_m.group('args')), code
|
return self.build_arglist(func_m.group('args')), code
|
||||||
|
|
||||||
def extract_function(self, funcname):
|
def extract_function(self, funcname, *global_stack):
|
||||||
return function_with_repr(
|
return function_with_repr(
|
||||||
self.extract_function_from_code(*self.extract_function_code(funcname)),
|
self.extract_function_from_code(*itertools.chain(
|
||||||
|
self.extract_function_code(funcname), global_stack)),
|
||||||
'F<%s>' % (funcname,))
|
'F<%s>' % (funcname,))
|
||||||
|
|
||||||
def extract_function_from_code(self, argnames, code, *global_stack):
|
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||||
local_vars = {}
|
local_vars = {}
|
||||||
|
|
||||||
|
start = None
|
||||||
while True:
|
while True:
|
||||||
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
|
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code[start:])
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
break
|
break
|
||||||
start, body_start = mobj.span()
|
start, body_start = ((start or 0) + x for x in mobj.span())
|
||||||
body, remaining = self._separate_at_paren(code[body_start - 1:])
|
body, remaining = self._separate_at_paren(code[body_start - 1:])
|
||||||
name = self._named_object(local_vars, self.extract_function_from_code(
|
name = self._named_object(local_vars, self.extract_function_from_code(
|
||||||
[x.strip() for x in mobj.group('args').split(',')],
|
[x.strip() for x in mobj.group('args').split(',')],
|
||||||
|
|
|
@ -814,6 +814,11 @@ def parseOpts(overrideArguments=None):
|
||||||
'--no-post-overwrites',
|
'--no-post-overwrites',
|
||||||
action='store_true', dest='nopostoverwrites', default=False,
|
action='store_true', dest='nopostoverwrites', default=False,
|
||||||
help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
|
help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
|
||||||
|
postproc.add_option(
|
||||||
|
'--aac-to-mp3',
|
||||||
|
action='store_true', dest='aacToMp3', default=False,
|
||||||
|
help='Convert AAC files to MP3',
|
||||||
|
)
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--embed-subs',
|
'--embed-subs',
|
||||||
action='store_true', dest='embedsubtitles', default=False,
|
action='store_true', dest='embedsubtitles', default=False,
|
||||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .embedthumbnail import EmbedThumbnailPP
|
from .embedthumbnail import EmbedThumbnailPP
|
||||||
from .ffmpeg import (
|
from .ffmpeg import (
|
||||||
|
ConvertAACToMP3PP,
|
||||||
FFmpegPostProcessor,
|
FFmpegPostProcessor,
|
||||||
FFmpegEmbedSubtitlePP,
|
FFmpegEmbedSubtitlePP,
|
||||||
FFmpegExtractAudioPP,
|
FFmpegExtractAudioPP,
|
||||||
|
@ -23,6 +24,7 @@ def get_postprocessor(key):
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
'ConvertAACToMP3PP',
|
||||||
'EmbedThumbnailPP',
|
'EmbedThumbnailPP',
|
||||||
'ExecAfterDownloadPP',
|
'ExecAfterDownloadPP',
|
||||||
'FFmpegEmbedSubtitlePP',
|
'FFmpegEmbedSubtitlePP',
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
@ -21,6 +22,9 @@ from ..utils import (
|
||||||
from ..compat import compat_open as open
|
from ..compat import compat_open as open
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger('soundcloudutil.downloader')
|
||||||
|
|
||||||
|
|
||||||
class EmbedThumbnailPPError(PostProcessingError):
|
class EmbedThumbnailPPError(PostProcessingError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -128,6 +132,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||||
os.remove(encodeFilename(filename))
|
os.remove(encodeFilename(filename))
|
||||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||||
else:
|
else:
|
||||||
raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
|
logger.warning('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
|
||||||
|
# raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
|
||||||
|
|
||||||
return [], info
|
return [], info
|
||||||
|
|
|
@ -4,7 +4,8 @@ import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from .common import AudioConversionError, PostProcessor
|
from .common import AudioConversionError, PostProcessor
|
||||||
|
|
||||||
|
@ -651,3 +652,26 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||||
}
|
}
|
||||||
|
|
||||||
return sub_filenames, info
|
return sub_filenames, info
|
||||||
|
|
||||||
|
|
||||||
|
class ConvertAACToMP3PP(FFmpegPostProcessor):
|
||||||
|
"""
|
||||||
|
Custom post processor that converts .aac files to .mp3 files
|
||||||
|
"""
|
||||||
|
def run(self, info: dict[str, Any]) -> tuple[list[str], dict[str, Any]]:
|
||||||
|
if info['ext'] == 'aac':
|
||||||
|
aac_path = Path(info['filepath'])
|
||||||
|
mp3_path = aac_path.with_suffix('.mp3')
|
||||||
|
|
||||||
|
self._downloader.to_screen('[ffmpeg] Converting .aac to .mp3')
|
||||||
|
options: list[str] = [
|
||||||
|
'-codec:a', 'libmp3lame',
|
||||||
|
'-qscale:a', '0',
|
||||||
|
]
|
||||||
|
self.run_ffmpeg(str(aac_path), str(mp3_path), options)
|
||||||
|
aac_path.unlink()
|
||||||
|
|
||||||
|
info['filepath'] = str(mp3_path)
|
||||||
|
info['ext'] = 'mp3'
|
||||||
|
|
||||||
|
return [], info
|
||||||
|
|
|
@ -4204,12 +4204,16 @@ def lowercase_escape(s):
|
||||||
s)
|
s)
|
||||||
|
|
||||||
|
|
||||||
def escape_rfc3986(s):
|
def escape_rfc3986(s, safe=None):
|
||||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
s = _encode_compat_str(s, 'utf-8')
|
s = _encode_compat_str(s, 'utf-8')
|
||||||
|
if safe is not None:
|
||||||
|
safe = _encode_compat_str(safe, 'utf-8')
|
||||||
|
if safe is None:
|
||||||
|
safe = b"%/;:@&=+$,!~*'()?#[]"
|
||||||
# ensure unicode: after quoting, it can always be converted
|
# ensure unicode: after quoting, it can always be converted
|
||||||
return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
|
return compat_str(compat_urllib_parse.quote(s, safe))
|
||||||
|
|
||||||
|
|
||||||
def escape_url(url):
|
def escape_url(url):
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2021.12.17'
|
__version__ = '2025.04.07'
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue