Merge branch 'master' into strip

This commit is contained in:
Connor Mason 2025-06-19 15:30:41 -07:00
commit e8d433c359
21 changed files with 1109 additions and 493 deletions

View file

@ -116,7 +116,7 @@ jobs:
strategy: strategy:
fail-fast: true fail-fast: true
matrix: matrix:
os: [ubuntu-20.04] os: [ubuntu-22.04]
python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }} python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
python-impl: [cpython] python-impl: [cpython]
ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }} ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
@ -133,12 +133,12 @@ jobs:
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
run-tests-ext: bat run-tests-ext: bat
# jython # jython
- os: ubuntu-20.04 - os: ubuntu-22.04
python-version: 2.7 python-version: 2.7
python-impl: jython python-impl: jython
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
run-tests-ext: sh run-tests-ext: sh
- os: ubuntu-20.04 - os: ubuntu-22.04
python-version: 2.7 python-version: 2.7
python-impl: jython python-impl: jython
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
@ -160,7 +160,7 @@ jobs:
# NB may run apt-get install in Linux # NB may run apt-get install in Linux
uses: ytdl-org/setup-python@v1 uses: ytdl-org/setup-python@v1
env: env:
# Temporary workaround for Python 3.5 failures - May 2024 # Temporary (?) workaround for Python 3.5 failures - May 2024
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org" PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
@ -240,7 +240,10 @@ jobs:
# install 2.7 # install 2.7
shell: bash shell: bash
run: | run: |
sudo apt-get install -y python2 python-is-python2 # Ubuntu 22.04 no longer has python-is-python2: fetch it
curl -L "http://launchpadlibrarian.net/474693132/python-is-python2_2.7.17-4_all.deb" -o python-is-python2.deb
sudo apt-get install -y python2
sudo dpkg --force-breaks -i python-is-python2.deb
echo "PYTHONHOME=/usr" >> "$GITHUB_ENV" echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
#-------- Python 2.6 -- #-------- Python 2.6 --
- name: Set up Python 2.6 environment - name: Set up Python 2.6 environment

View file

@ -33,8 +33,13 @@ py2exe_options = {
} }
# Get the version from youtube_dl/version.py without importing the package # Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(), exec(
'youtube_dl/version.py', 'exec')) compile(
open('youtube_dl/version.py').read(),
'youtube_dl/version.py',
'exec',
)
)
DESCRIPTION = 'YouTube video downloader' DESCRIPTION = 'YouTube video downloader'
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites' LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
@ -125,9 +130,6 @@ setup(
'Environment :: Console', 'Environment :: Console',
'License :: Public Domain', 'License :: Public Domain',
'Programming Language :: Python', 'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.3',

View file

@ -1,5 +1,5 @@
[tox] [tox]
envlist = py26,py27,py33,py34,py35 envlist = py33,py34,py35
[testenv] [testenv]
deps = deps =
nose nose

View file

@ -12,6 +12,7 @@ import io
import itertools import itertools
import json import json
import locale import locale
import logging
import operator import operator
import os import os
import platform import platform
@ -24,6 +25,8 @@ import time
import tokenize import tokenize
import traceback import traceback
import random import random
from typing import Any
from typing import cast
try: try:
from ssl import OPENSSL_VERSION from ssl import OPENSSL_VERSION
@ -130,6 +133,10 @@ from .version import __version__
if compat_os_name == 'nt': if compat_os_name == 'nt':
import ctypes import ctypes
logger = logging.getLogger('soundcloudutil.downloader')
TAGGED_LOG_MSG_REGEX = re.compile(r'^\[(?P<tag>\w+)(:(?P<subtag>\w+))?\]\s*(?P<msg>.+)$')
def _catch_unsafe_file_extension(func): def _catch_unsafe_file_extension(func):
@functools.wraps(func) @functools.wraps(func)
@ -494,27 +501,66 @@ class YoutubeDL(object):
"""Add the progress hook (currently only for the file downloader)""" """Add the progress hook (currently only for the file downloader)"""
self._progress_hooks.append(ph) self._progress_hooks.append(ph)
def _write_string(self, s, out=None): def _bidi_workaround(self, message):
if not hasattr(self, '_output_channel'):
return message
assert hasattr(self, '_output_process')
assert isinstance(message, compat_str)
line_count = message.count('\n') + 1
self._output_process.stdin.write((message + '\n').encode('utf-8'))
self._output_process.stdin.flush()
res = ''.join(self._output_channel.readline().decode('utf-8')
for _ in range(line_count))
return res[:-len('\n')]
def to_screen(self, message, skip_eol: bool = False):
"""Print message to stdout if not in quiet mode."""
return self.to_stdout(message, skip_eol, check_quiet=True)
@property
def user_logger(self) -> logging.Logger | None:
return cast(logging.Logger | None, self.params.get('logger'))
def _write_string(self, s: str, out: io.TextIOWrapper | None = None) -> None:
write_string(s, out=out, encoding=self.params.get('encoding')) write_string(s, out=out, encoding=self.params.get('encoding'))
def to_stdout(self, message, skip_eol=False, check_quiet=False): def to_stdout(self, message, skip_eol: bool = False, check_quiet: bool = False):
"""Print message to stdout if not in quiet mode.""" """Print message to stdout if not in quiet mode."""
if self.params.get('logger'): quiet = check_quiet and self.params.get('quiet', False)
self.params['logger'].debug(message)
elif not check_quiet or not self.params.get('quiet', False):
terminator = ['\n', ''][skip_eol]
output = message + terminator
self._write_string(output, self._screen_file) debug: bool
if message.startswith(f'[debug]'):
def to_stderr(self, message): debug = True
"""Print message to stderr.""" message = message.removeprefix('[debug]').lstrip()
assert isinstance(message, compat_str) elif message.startswith('[info]'):
if self.params.get('logger'): debug = False
self.params['logger'].error(message) message = message.removeprefix('[info]').lstrip()
elif quiet:
debug = True
else: else:
output = message + '\n' debug = False
self._write_string(output, self._err_file)
_logger = logger
if m := TAGGED_LOG_MSG_REGEX.match(message):
tag = m.group('tag')
subtag = m.group('subtag')
_logger_name = f'youtube_dl.{tag}'
if m.group('subtag'):
_logger_name += f'.{subtag}'
_logger = logging.getLogger(_logger_name)
message = m.group('msg')
if debug:
_logger.debug(message)
else:
_logger.info(message)
def to_stderr(self, message: str) -> None:
if self.user_logger is not None:
self.user_logger.error(message)
else:
logger.error(message)
def to_screen(self, message, skip_eol=False): def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode.""" """Print message to stdout if not in quiet mode."""
@ -558,11 +604,8 @@ class YoutubeDL(object):
raise DownloadError(message, exc_info) raise DownloadError(message, exc_info)
self._download_retcode = 1 self._download_retcode = 1
def report_warning(self, message, only_once=False, _cache={}): def report_warning(self, message: str, only_once: bool = False, _cache: dict[int, int] | None = None) -> None:
''' _cache = _cache or {}
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if only_once: if only_once:
m_hash = hash((self, message)) m_hash = hash((self, message))
m_cnt = _cache.setdefault(m_hash, 0) m_cnt = _cache.setdefault(m_hash, 0)
@ -570,68 +613,28 @@ class YoutubeDL(object):
if m_cnt > 0: if m_cnt > 0:
return return
if self.params.get('logger') is not None: if self.user_logger is not None:
self.params['logger'].warning(message) self.user_logger.warning(message)
else: else:
if self.params.get('no_warnings'): if self.params.get('no_warnings'):
return return
if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': logger.warning(message)
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'
warning_message = '%s %s' % (_msg_header, message)
self.to_stderr(warning_message)
def report_error(self, message, *args, **kwargs): # TODO: re-implement :meth:`trouble` to output tracebacks with RichHandler
''' def report_error(self, message: str, *args: Any, **kwargs: Any) -> None:
Do the same as trouble, but prefixes the message with 'ERROR:', colored logger.error(message)
in red if stderr is a tty file. kwargs['message'] = f'ERROR: {message}'
'''
if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
_msg_header = '\033[0;31mERROR:\033[0m'
else:
_msg_header = 'ERROR:'
kwargs['message'] = '%s %s' % (_msg_header, message)
self.trouble(*args, **kwargs) self.trouble(*args, **kwargs)
def to_console_title(self, message): def write_debug(self, message, only_once=False):
if not self.params.get('consoletitle', False): '''Log debug message or Print message to stderr'''
if not self.params.get('verbose', False):
return return
if compat_os_name == 'nt': message = '[debug] {0}'.format(message)
if ctypes.windll.kernel32.GetConsoleWindow(): if self.params.get('logger'):
# c_wchar_p() might not be necessary if `message` is self.params['logger'].debug(message)
# already of type unicode() else:
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) self.to_stderr(message, only_once)
elif 'TERM' in os.environ:
self._write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
if self.params.get('simulate', False):
return
if compat_os_name != 'nt' and 'TERM' in os.environ:
# Save the title on stack
self._write_string('\033[22;0t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
if self.params.get('simulate', False):
return
if compat_os_name != 'nt' and 'TERM' in os.environ:
# Restore the title from stack
self._write_string('\033[23;0t', self._screen_file)
def __enter__(self):
self.save_console_title()
return self
def __exit__(self, *args):
self.restore_console_title()
if self.params.get('cookiefile') is not None:
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
def report_unscoped_cookies(self, *args, **kwargs): def report_unscoped_cookies(self, *args, **kwargs):
# message=None, tb=False, is_error=False # message=None, tb=False, is_error=False
@ -2470,7 +2473,7 @@ class YoutubeDL(object):
self.get_encoding())) self.get_encoding()))
write_string(encoding_str, encoding=None) write_string(encoding_str, encoding=None)
writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), )) writeln_debug = lambda *s: self.write_debug(''.join(s))
writeln_debug('youtube-dl version ', __version__) writeln_debug('youtube-dl version ', __version__)
if _LAZY_LOADER: if _LAZY_LOADER:
writeln_debug('Lazy loading extractors enabled') writeln_debug('Lazy loading extractors enabled')
@ -2612,7 +2615,13 @@ class YoutubeDL(object):
encoding = preferredencoding() encoding = preferredencoding()
return encoding return encoding
def _write_info_json(self, label, info_dict, infofn, overwrite=None): def _write_info_json(
self,
label: str,
info_dict: dict[str, Any],
infofn: str,
overwrite: bool | None = None,
) -> bool | str | None:
if not self.params.get('writeinfojson', False): if not self.params.get('writeinfojson', False):
return False return False
@ -2632,7 +2641,7 @@ class YoutubeDL(object):
return True return True
except (OSError, IOError): except (OSError, IOError):
self.report_error(msg('Cannot write %s to JSON file ', label) + infofn) self.report_error(msg('Cannot write %s to JSON file ', label) + infofn)
return return None
def _write_thumbnails(self, info_dict, filename): def _write_thumbnails(self, info_dict, filename):
if self.params.get('writethumbnail', False): if self.params.get('writethumbnail', False):

View file

@ -18,7 +18,7 @@ from .compat import (
compat_getpass, compat_getpass,
compat_register_utf8, compat_register_utf8,
compat_shlex_split, compat_shlex_split,
workaround_optparse_bug9161, _workaround_optparse_bug9161,
) )
from .utils import ( from .utils import (
_UnsafeExtensionError, _UnsafeExtensionError,
@ -50,7 +50,7 @@ def _real_main(argv=None):
# Compatibility fix for Windows # Compatibility fix for Windows
compat_register_utf8() compat_register_utf8()
workaround_optparse_bug9161() _workaround_optparse_bug9161()
setproctitle('youtube-dl') setproctitle('youtube-dl')
@ -287,6 +287,10 @@ def _real_main(argv=None):
postprocessors.append({ postprocessors.append({
'key': 'FFmpegEmbedSubtitle', 'key': 'FFmpegEmbedSubtitle',
}) })
if opts.aacToMp3:
postprocessors.append({
'key': 'ConvertAACToMP3PP',
})
if opts.embedthumbnail: if opts.embedthumbnail:
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
postprocessors.append({ postprocessors.append({

View file

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import errno import errno
@ -10,12 +11,14 @@ import traceback
from .compat import ( from .compat import (
compat_getenv, compat_getenv,
compat_open as open, compat_open as open,
compat_os_makedirs,
) )
from .utils import ( from .utils import (
error_to_compat_str, error_to_compat_str,
escape_rfc3986,
expand_path, expand_path,
is_outdated_version, is_outdated_version,
try_get, traverse_obj,
write_json_file, write_json_file,
) )
from .version import __version__ from .version import __version__
@ -30,23 +33,35 @@ class Cache(object):
def __init__(self, ydl): def __init__(self, ydl):
self._ydl = ydl self._ydl = ydl
def _write_debug(self, *args, **kwargs):
self._ydl.write_debug(*args, **kwargs)
def _report_warning(self, *args, **kwargs):
self._ydl.report_warning(*args, **kwargs)
def _to_screen(self, *args, **kwargs):
self._ydl.to_screen(*args, **kwargs)
def _get_param(self, k, default=None):
return self._ydl.params.get(k, default)
def _get_root_dir(self): def _get_root_dir(self):
res = self._ydl.params.get('cachedir') res = self._get_param('cachedir')
if res is None: if res is None:
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache') cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, self._YTDL_DIR) res = os.path.join(cache_root, self._YTDL_DIR)
return expand_path(res) return expand_path(res)
def _get_cache_fn(self, section, key, dtype): def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ assert re.match(r'^[\w.-]+$', section), \
'invalid section %r' % section 'invalid section %r' % section
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key key = escape_rfc3986(key, safe='').replace('%', ',') # encode non-ascii characters
return os.path.join( return os.path.join(
self._get_root_dir(), section, '%s.%s' % (key, dtype)) self._get_root_dir(), section, '%s.%s' % (key, dtype))
@property @property
def enabled(self): def enabled(self):
return self._ydl.params.get('cachedir') is not False return self._get_param('cachedir') is not False
def store(self, section, key, data, dtype='json'): def store(self, section, key, data, dtype='json'):
assert dtype in ('json',) assert dtype in ('json',)
@ -56,61 +71,75 @@ class Cache(object):
fn = self._get_cache_fn(section, key, dtype) fn = self._get_cache_fn(section, key, dtype)
try: try:
try: compat_os_makedirs(os.path.dirname(fn), exist_ok=True)
os.makedirs(os.path.dirname(fn)) self._write_debug('Saving {section}.{key} to cache'.format(section=section, key=key))
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn) write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
except Exception: except Exception:
tb = traceback.format_exc() tb = traceback.format_exc()
self._ydl.report_warning( self._report_warning('Writing cache to {fn!r} failed: {tb}'.format(fn=fn, tb=tb))
'Writing cache to %r failed: %s' % (fn, tb))
def clear(self, section, key, dtype='json'):
if not self.enabled:
return
fn = self._get_cache_fn(section, key, dtype)
self._write_debug('Clearing {section}.{key} from cache'.format(section=section, key=key))
try:
os.remove(fn)
except Exception as e:
if getattr(e, 'errno') == errno.ENOENT:
# file not found
return
tb = traceback.format_exc()
self._report_warning('Clearing cache from {fn!r} failed: {tb}'.format(fn=fn, tb=tb))
def _validate(self, data, min_ver): def _validate(self, data, min_ver):
version = try_get(data, lambda x: x[self._VERSION_KEY]) version = traverse_obj(data, self._VERSION_KEY)
if not version: # Backward compatibility if not version: # Backward compatibility
data, version = {'data': data}, self._DEFAULT_VERSION data, version = {'data': data}, self._DEFAULT_VERSION
if not is_outdated_version(version, min_ver or '0', assume_new=False): if not is_outdated_version(version, min_ver or '0', assume_new=False):
return data['data'] return data['data']
self._ydl.to_screen( self._write_debug('Discarding old cache from version {version} (needs {min_ver})'.format(version=version, min_ver=min_ver))
'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
def load(self, section, key, dtype='json', default=None, min_ver=None): def load(self, section, key, dtype='json', default=None, **kw_min_ver):
assert dtype in ('json',) assert dtype in ('json',)
min_ver = kw_min_ver.get('min_ver')
if not self.enabled: if not self.enabled:
return default return default
cache_fn = self._get_cache_fn(section, key, dtype) cache_fn = self._get_cache_fn(section, key, dtype)
try: try:
with open(cache_fn, encoding='utf-8') as cachef:
self._write_debug('Loading {section}.{key} from cache'.format(section=section, key=key), only_once=True)
return self._validate(json.load(cachef), min_ver)
except (ValueError, KeyError):
try: try:
with open(cache_fn, 'r', encoding='utf-8') as cachef: file_size = 'size: %d' % os.path.getsize(cache_fn)
return self._validate(json.load(cachef), min_ver) except (OSError, IOError) as oe:
except ValueError: file_size = error_to_compat_str(oe)
try: self._report_warning('Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
file_size = os.path.getsize(cache_fn) except Exception as e:
except (OSError, IOError) as oe: if getattr(e, 'errno') == errno.ENOENT:
file_size = error_to_compat_str(oe) # no cache available
self._ydl.report_warning( return
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) self._report_warning('Cache retrieval from %s failed' % (cache_fn,))
except IOError:
pass # No cache available
return default return default
def remove(self): def remove(self):
if not self.enabled: if not self.enabled:
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') self._to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
return return
cachedir = self._get_root_dir() cachedir = self._get_root_dir()
if not any((term in cachedir) for term in ('cache', 'tmp')): if not any((term in cachedir) for term in ('cache', 'tmp')):
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) raise Exception('Not removing directory %s - this does not look like a cache dir' % (cachedir,))
self._ydl.to_screen( self._to_screen(
'Removing cache dir %s .' % cachedir, skip_eol=True) 'Removing cache dir %s .' % (cachedir,), skip_eol=True, ),
if os.path.exists(cachedir): if os.path.exists(cachedir):
self._ydl.to_screen('.', skip_eol=True) self._to_screen('.', skip_eol=True)
shutil.rmtree(cachedir) shutil.rmtree(cachedir)
self._ydl.to_screen('.') self._to_screen('.')

View file

@ -16,7 +16,6 @@ import os
import platform import platform
import re import re
import shlex import shlex
import shutil
import socket import socket
import struct import struct
import subprocess import subprocess
@ -24,11 +23,15 @@ import sys
import types import types
import xml.etree.ElementTree import xml.etree.ElementTree
_IDENTITY = lambda x: x
# naming convention # naming convention
# 'compat_' + Python3_name.replace('.', '_') # 'compat_' + Python3_name.replace('.', '_')
# other aliases exist for convenience and/or legacy # other aliases exist for convenience and/or legacy
# wrap disposable test values in type() to reclaim storage
# deal with critical unicode/str things first # deal with critical unicode/str things first:
# compat_str, compat_basestring, compat_chr
try: try:
# Python 2 # Python 2
compat_str, compat_basestring, compat_chr = ( compat_str, compat_basestring, compat_chr = (
@ -39,18 +42,23 @@ except NameError:
str, (str, bytes), chr str, (str, bytes), chr
) )
# casefold
# compat_casefold
try: try:
compat_str.casefold compat_str.casefold
compat_casefold = lambda s: s.casefold() compat_casefold = lambda s: s.casefold()
except AttributeError: except AttributeError:
from .casefold import _casefold as compat_casefold from .casefold import _casefold as compat_casefold
# compat_collections_abc
try: try:
import collections.abc as compat_collections_abc import collections.abc as compat_collections_abc
except ImportError: except ImportError:
import collections as compat_collections_abc import collections as compat_collections_abc
# compat_urllib_request
try: try:
import urllib.request as compat_urllib_request import urllib.request as compat_urllib_request
except ImportError: # Python 2 except ImportError: # Python 2
@ -79,11 +87,15 @@ except TypeError:
_add_init_method_arg(compat_urllib_request.Request) _add_init_method_arg(compat_urllib_request.Request)
del _add_init_method_arg del _add_init_method_arg
# compat_urllib_error
try: try:
import urllib.error as compat_urllib_error import urllib.error as compat_urllib_error
except ImportError: # Python 2 except ImportError: # Python 2
import urllib2 as compat_urllib_error import urllib2 as compat_urllib_error
# compat_urllib_parse
try: try:
import urllib.parse as compat_urllib_parse import urllib.parse as compat_urllib_parse
except ImportError: # Python 2 except ImportError: # Python 2
@ -98,17 +110,23 @@ except ImportError: # Python 2
compat_urlparse = compat_urllib_parse compat_urlparse = compat_urllib_parse
compat_urllib_parse_urlparse = compat_urllib_parse.urlparse compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
# compat_urllib_response
try: try:
import urllib.response as compat_urllib_response import urllib.response as compat_urllib_response
except ImportError: # Python 2 except ImportError: # Python 2
import urllib as compat_urllib_response import urllib as compat_urllib_response
# compat_urllib_response.addinfourl
try: try:
compat_urllib_response.addinfourl.status compat_urllib_response.addinfourl.status
except AttributeError: except AttributeError:
# .getcode() is deprecated in Py 3. # .getcode() is deprecated in Py 3.
compat_urllib_response.addinfourl.status = property(lambda self: self.getcode()) compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
# compat_http_cookiejar
try: try:
import http.cookiejar as compat_cookiejar import http.cookiejar as compat_cookiejar
except ImportError: # Python 2 except ImportError: # Python 2
@ -127,12 +145,16 @@ else:
compat_cookiejar_Cookie = compat_cookiejar.Cookie compat_cookiejar_Cookie = compat_cookiejar.Cookie
compat_http_cookiejar_Cookie = compat_cookiejar_Cookie compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
# compat_http_cookies
try: try:
import http.cookies as compat_cookies import http.cookies as compat_cookies
except ImportError: # Python 2 except ImportError: # Python 2
import Cookie as compat_cookies import Cookie as compat_cookies
compat_http_cookies = compat_cookies compat_http_cookies = compat_cookies
# compat_http_cookies_SimpleCookie
if sys.version_info[0] == 2 or sys.version_info < (3, 3): if sys.version_info[0] == 2 or sys.version_info < (3, 3):
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie): class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
def load(self, rawdata): def load(self, rawdata):
@ -155,11 +177,15 @@ else:
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
# compat_html_entities, probably useless now
try: try:
import html.entities as compat_html_entities import html.entities as compat_html_entities
except ImportError: # Python 2 except ImportError: # Python 2
import htmlentitydefs as compat_html_entities import htmlentitydefs as compat_html_entities
# compat_html_entities_html5
try: # Python >= 3.3 try: # Python >= 3.3
compat_html_entities_html5 = compat_html_entities.html5 compat_html_entities_html5 = compat_html_entities.html5
except AttributeError: except AttributeError:
@ -2408,18 +2434,24 @@ except AttributeError:
# Py < 3.1 # Py < 3.1
compat_http_client.HTTPResponse.getcode = lambda self: self.status compat_http_client.HTTPResponse.getcode = lambda self: self.status
# compat_urllib_HTTPError
try: try:
from urllib.error import HTTPError as compat_HTTPError from urllib.error import HTTPError as compat_HTTPError
except ImportError: # Python 2 except ImportError: # Python 2
from urllib2 import HTTPError as compat_HTTPError from urllib2 import HTTPError as compat_HTTPError
compat_urllib_HTTPError = compat_HTTPError compat_urllib_HTTPError = compat_HTTPError
# compat_urllib_request_urlretrieve
try: try:
from urllib.request import urlretrieve as compat_urlretrieve from urllib.request import urlretrieve as compat_urlretrieve
except ImportError: # Python 2 except ImportError: # Python 2
from urllib import urlretrieve as compat_urlretrieve from urllib import urlretrieve as compat_urlretrieve
compat_urllib_request_urlretrieve = compat_urlretrieve compat_urllib_request_urlretrieve = compat_urlretrieve
# compat_html_parser_HTMLParser, compat_html_parser_HTMLParseError
try: try:
from HTMLParser import ( from HTMLParser import (
HTMLParser as compat_HTMLParser, HTMLParser as compat_HTMLParser,
@ -2432,22 +2464,33 @@ except ImportError: # Python 3
# HTMLParseError was deprecated in Python 3.3 and removed in # HTMLParseError was deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exception handling # and uniform cross-version exception handling
class compat_HTMLParseError(Exception): class compat_HTMLParseError(Exception):
pass pass
compat_html_parser_HTMLParser = compat_HTMLParser compat_html_parser_HTMLParser = compat_HTMLParser
compat_html_parser_HTMLParseError = compat_HTMLParseError compat_html_parser_HTMLParseError = compat_HTMLParseError
# compat_subprocess_get_DEVNULL
try: try:
_DEVNULL = subprocess.DEVNULL _DEVNULL = subprocess.DEVNULL
compat_subprocess_get_DEVNULL = lambda: _DEVNULL compat_subprocess_get_DEVNULL = lambda: _DEVNULL
except AttributeError: except AttributeError:
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
# compat_http_server
try: try:
import http.server as compat_http_server import http.server as compat_http_server
except ImportError: except ImportError:
import BaseHTTPServer as compat_http_server import BaseHTTPServer as compat_http_server
# compat_urllib_parse_unquote_to_bytes,
# compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus,
# compat_urllib_parse_urlencode,
# compat_urllib_parse_parse_qs
try: try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote from urllib.parse import unquote as compat_urllib_parse_unquote
@ -2455,8 +2498,7 @@ try:
from urllib.parse import urlencode as compat_urllib_parse_urlencode from urllib.parse import urlencode as compat_urllib_parse_urlencode
from urllib.parse import parse_qs as compat_parse_qs from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2 except ImportError: # Python 2
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire') _asciire = getattr(compat_urllib_parse, '_asciire', None) or re.compile(r'([\x00-\x7f]+)')
else re.compile(r'([\x00-\x7f]+)'))
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
# implementations from cpython 3.4.3's stdlib. Python 2's version # implementations from cpython 3.4.3's stdlib. Python 2's version
@ -2524,24 +2566,21 @@ except ImportError: # Python 2
# Possible solutions are to either port it from python 3 with all # Possible solutions are to either port it from python 3 with all
# the friends or manually ensure input query contains only byte strings. # the friends or manually ensure input query contains only byte strings.
# We will stick with latter thus recursively encoding the whole query. # We will stick with latter thus recursively encoding the whole query.
def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'): def compat_urllib_parse_urlencode(query, doseq=0, safe='', encoding='utf-8', errors='strict'):
def encode_elem(e): def encode_elem(e):
if isinstance(e, dict): if isinstance(e, dict):
e = encode_dict(e) e = encode_dict(e)
elif isinstance(e, (list, tuple,)): elif isinstance(e, (list, tuple,)):
list_e = encode_list(e) e = type(e)(encode_elem(el) for el in e)
e = tuple(list_e) if isinstance(e, tuple) else list_e
elif isinstance(e, compat_str): elif isinstance(e, compat_str):
e = e.encode(encoding) e = e.encode(encoding, errors)
return e return e
def encode_dict(d): def encode_dict(d):
return dict((encode_elem(k), encode_elem(v)) for k, v in d.items()) return tuple((encode_elem(k), encode_elem(v)) for k, v in d.items())
def encode_list(l): return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq).decode('ascii')
return [encode_elem(e) for e in l]
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
# Python 2's version is apparently totally broken # Python 2's version is apparently totally broken
@ -2596,8 +2635,61 @@ except ImportError: # Python 2
('parse_qs', compat_parse_qs)): ('parse_qs', compat_parse_qs)):
setattr(compat_urllib_parse, name, fix) setattr(compat_urllib_parse, name, fix)
try:
all(chr(i) in b'' for i in range(256))
except TypeError:
# not all chr(i) are str: patch Python2 quote
_safemaps = getattr(compat_urllib_parse, '_safemaps', {})
_always_safe = frozenset(compat_urllib_parse.always_safe)
def _quote(s, safe='/'):
"""quote('abc def') -> 'abc%20def'"""
if not s and s is not None: # fast path
return s
safe = frozenset(safe)
cachekey = (safe, _always_safe)
try:
safe_map = _safemaps[cachekey]
except KeyError:
safe = _always_safe | safe
safe_map = {}
for i in range(256):
c = chr(i)
safe_map[c] = (
c if (i < 128 and c in safe)
else b'%{0:02X}'.format(i))
_safemaps[cachekey] = safe_map
if safe.issuperset(s):
return s
return ''.join(safe_map[c] for c in s)
# linked code
def _quote_plus(s, safe=''):
return (
_quote(s, safe + b' ').replace(b' ', b'+') if b' ' in s
else _quote(s, safe))
# linked code
def _urlcleanup():
if compat_urllib_parse._urlopener:
compat_urllib_parse._urlopener.cleanup()
_safemaps.clear()
compat_urllib_parse.ftpcache.clear()
for name, fix in (
('quote', _quote),
('quote_plus', _quote_plus),
('urlcleanup', _urlcleanup)):
setattr(compat_urllib_parse, '_' + name, getattr(compat_urllib_parse, name))
setattr(compat_urllib_parse, name, fix)
compat_urllib_parse_parse_qs = compat_parse_qs compat_urllib_parse_parse_qs = compat_parse_qs
# compat_urllib_request_DataHandler
try: try:
from urllib.request import DataHandler as compat_urllib_request_DataHandler from urllib.request import DataHandler as compat_urllib_request_DataHandler
except ImportError: # Python < 3.4 except ImportError: # Python < 3.4
@ -2632,16 +2724,20 @@ except ImportError: # Python < 3.4
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
# compat_xml_etree_ElementTree_ParseError
try: try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6 except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error from xml.parsers.expat import ExpatError as compat_xml_parse_error
compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
etree = xml.etree.ElementTree
# compat_xml_etree_ElementTree_Element
_etree = xml.etree.ElementTree
class _TreeBuilder(etree.TreeBuilder): class _TreeBuilder(_etree.TreeBuilder):
def doctype(self, name, pubid, system): def doctype(self, name, pubid, system):
pass pass
@ -2650,7 +2746,7 @@ try:
# xml.etree.ElementTree.Element is a method in Python <=2.6 and # xml.etree.ElementTree.Element is a method in Python <=2.6 and
# the following will crash with: # the following will crash with:
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
isinstance(None, etree.Element) isinstance(None, _etree.Element)
from xml.etree.ElementTree import Element as compat_etree_Element from xml.etree.ElementTree import Element as compat_etree_Element
except TypeError: # Python <=2.6 except TypeError: # Python <=2.6
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
@ -2658,12 +2754,12 @@ compat_xml_etree_ElementTree_Element = compat_etree_Element
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
def compat_etree_fromstring(text): def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) return _etree.XML(text, parser=_etree.XMLParser(target=_TreeBuilder()))
else: else:
# python 2.x tries to encode unicode strings with ascii (see the # python 2.x tries to encode unicode strings with ascii (see the
# XMLParser._fixtext method) # XMLParser._fixtext method)
try: try:
_etree_iter = etree.Element.iter _etree_iter = _etree.Element.iter
except AttributeError: # Python <=2.6 except AttributeError: # Python <=2.6
def _etree_iter(root): def _etree_iter(root):
for el in root.findall('*'): for el in root.findall('*'):
@ -2675,27 +2771,29 @@ else:
# 2.7 source # 2.7 source
def _XML(text, parser=None): def _XML(text, parser=None):
if not parser: if not parser:
parser = etree.XMLParser(target=_TreeBuilder()) parser = _etree.XMLParser(target=_TreeBuilder())
parser.feed(text) parser.feed(text)
return parser.close() return parser.close()
def _element_factory(*args, **kwargs): def _element_factory(*args, **kwargs):
el = etree.Element(*args, **kwargs) el = _etree.Element(*args, **kwargs)
for k, v in el.items(): for k, v in el.items():
if isinstance(v, bytes): if isinstance(v, bytes):
el.set(k, v.decode('utf-8')) el.set(k, v.decode('utf-8'))
return el return el
def compat_etree_fromstring(text): def compat_etree_fromstring(text):
doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) doc = _XML(text, parser=_etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
for el in _etree_iter(doc): for el in _etree_iter(doc):
if el.text is not None and isinstance(el.text, bytes): if el.text is not None and isinstance(el.text, bytes):
el.text = el.text.decode('utf-8') el.text = el.text.decode('utf-8')
return doc return doc
if hasattr(etree, 'register_namespace'):
compat_etree_register_namespace = etree.register_namespace # compat_xml_etree_register_namespace
else: try:
compat_etree_register_namespace = _etree.register_namespace
except AttributeError:
def compat_etree_register_namespace(prefix, uri): def compat_etree_register_namespace(prefix, uri):
"""Register a namespace prefix. """Register a namespace prefix.
The registry is global, and any existing mapping for either the The registry is global, and any existing mapping for either the
@ -2704,14 +2802,16 @@ else:
attributes in this namespace will be serialized with prefix if possible. attributes in this namespace will be serialized with prefix if possible.
ValueError is raised if prefix is reserved or is invalid. ValueError is raised if prefix is reserved or is invalid.
""" """
if re.match(r"ns\d+$", prefix): if re.match(r'ns\d+$', prefix):
raise ValueError("Prefix format reserved for internal use") raise ValueError('Prefix format reserved for internal use')
for k, v in list(etree._namespace_map.items()): for k, v in list(_etree._namespace_map.items()):
if k == uri or v == prefix: if k == uri or v == prefix:
del etree._namespace_map[k] del _etree._namespace_map[k]
etree._namespace_map[uri] = prefix _etree._namespace_map[uri] = prefix
compat_xml_etree_register_namespace = compat_etree_register_namespace compat_xml_etree_register_namespace = compat_etree_register_namespace
# compat_xpath, compat_etree_iterfind
if sys.version_info < (2, 7): if sys.version_info < (2, 7):
# Here comes the crazy part: In 2.6, if the xpath is a unicode, # Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . ! # .//node does not match if a node is a direct child of . !
@ -2898,7 +2998,6 @@ if sys.version_info < (2, 7):
def __init__(self, root): def __init__(self, root):
self.root = root self.root = root
##
# Generate all matching objects. # Generate all matching objects.
def compat_etree_iterfind(elem, path, namespaces=None): def compat_etree_iterfind(elem, path, namespaces=None):
@ -2933,13 +3032,15 @@ if sys.version_info < (2, 7):
else: else:
compat_xpath = lambda xpath: xpath
compat_etree_iterfind = lambda element, match: element.iterfind(match) compat_etree_iterfind = lambda element, match: element.iterfind(match)
compat_xpath = _IDENTITY
# compat_os_name
compat_os_name = os._name if os.name == 'java' else os.name compat_os_name = os._name if os.name == 'java' else os.name
# compat_shlex_quote
if compat_os_name == 'nt': if compat_os_name == 'nt':
def compat_shlex_quote(s): def compat_shlex_quote(s):
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
@ -2954,6 +3055,7 @@ else:
return "'" + s.replace("'", "'\"'\"'") + "'" return "'" + s.replace("'", "'\"'\"'") + "'"
# compat_shlex.split
try: try:
args = shlex.split('中文') args = shlex.split('中文')
assert (isinstance(args, list) assert (isinstance(args, list)
@ -2969,6 +3071,7 @@ except (AssertionError, UnicodeEncodeError):
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix))) return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
# compat_ord
def compat_ord(c): def compat_ord(c):
if isinstance(c, int): if isinstance(c, int):
return c return c
@ -2976,6 +3079,7 @@ def compat_ord(c):
return ord(c) return ord(c)
# compat_getenv, compat_os_path_expanduser, compat_setenv
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
compat_getenv = os.getenv compat_getenv = os.getenv
compat_expanduser = os.path.expanduser compat_expanduser = os.path.expanduser
@ -3063,6 +3167,22 @@ else:
compat_os_path_expanduser = compat_expanduser compat_os_path_expanduser = compat_expanduser
# compat_os_makedirs
try:
os.makedirs('.', exist_ok=True)
compat_os_makedirs = os.makedirs
except TypeError: # < Py3.2
from errno import EEXIST as _errno_EEXIST
def compat_os_makedirs(name, mode=0o777, exist_ok=False):
try:
return os.makedirs(name, mode=mode)
except OSError as ose:
if not (exist_ok and ose.errno == _errno_EEXIST):
raise
# compat_os_path_realpath
if compat_os_name == 'nt' and sys.version_info < (3, 8): if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links # os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949) # prior to Python 3.8 (see https://bugs.python.org/issue9949)
@ -3076,6 +3196,7 @@ else:
compat_os_path_realpath = compat_realpath compat_os_path_realpath = compat_realpath
# compat_print
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
def compat_print(s): def compat_print(s):
from .utils import preferredencoding from .utils import preferredencoding
@ -3086,6 +3207,7 @@ else:
print(s) print(s)
# compat_getpass_getpass
if sys.version_info < (3, 0) and sys.platform == 'win32': if sys.version_info < (3, 0) and sys.platform == 'win32':
def compat_getpass(prompt, *args, **kwargs): def compat_getpass(prompt, *args, **kwargs):
if isinstance(prompt, compat_str): if isinstance(prompt, compat_str):
@ -3098,22 +3220,22 @@ else:
compat_getpass_getpass = compat_getpass compat_getpass_getpass = compat_getpass
# compat_input
try: try:
compat_input = raw_input compat_input = raw_input
except NameError: # Python 3 except NameError: # Python 3
compat_input = input compat_input = input
# compat_kwargs
# Python < 2.6.5 require kwargs to be bytes # Python < 2.6.5 require kwargs to be bytes
try: try:
def _testfunc(x): (lambda x: x)(**{'x': 0})
pass
_testfunc(**{'x': 0})
except TypeError: except TypeError:
def compat_kwargs(kwargs): def compat_kwargs(kwargs):
return dict((bytes(k), v) for k, v in kwargs.items()) return dict((bytes(k), v) for k, v in kwargs.items())
else: else:
compat_kwargs = lambda kwargs: kwargs compat_kwargs = _IDENTITY
# compat_numeric_types # compat_numeric_types
@ -3132,6 +3254,8 @@ except NameError: # Python 3
# compat_int # compat_int
compat_int = compat_integer_types[-1] compat_int = compat_integer_types[-1]
# compat_socket_create_connection
if sys.version_info < (2, 7): if sys.version_info < (2, 7):
def compat_socket_create_connection(address, timeout, source_address=None): def compat_socket_create_connection(address, timeout, source_address=None):
host, port = address host, port = address
@ -3158,6 +3282,7 @@ else:
compat_socket_create_connection = socket.create_connection compat_socket_create_connection = socket.create_connection
# compat_contextlib_suppress
try: try:
from contextlib import suppress as compat_contextlib_suppress from contextlib import suppress as compat_contextlib_suppress
except ImportError: except ImportError:
@ -3200,12 +3325,12 @@ except AttributeError:
# repeated .close() is OK, but just in case # repeated .close() is OK, but just in case
with compat_contextlib_suppress(EnvironmentError): with compat_contextlib_suppress(EnvironmentError):
f.close() f.close()
popen.wait() popen.wait()
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken # See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161(): def _workaround_optparse_bug9161():
op = optparse.OptionParser() op = optparse.OptionParser()
og = optparse.OptionGroup(op, 'foo') og = optparse.OptionGroup(op, 'foo')
try: try:
@ -3224,9 +3349,10 @@ def workaround_optparse_bug9161():
optparse.OptionGroup.add_option = _compat_add_option optparse.OptionGroup.add_option = _compat_add_option
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 # compat_shutil_get_terminal_size
compat_get_terminal_size = shutil.get_terminal_size try:
else: from shutil import get_terminal_size as compat_get_terminal_size # Python >= 3.3
except ImportError:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size(fallback=(80, 24)): def compat_get_terminal_size(fallback=(80, 24)):
@ -3256,27 +3382,33 @@ else:
columns = _columns columns = _columns
if lines is None or lines <= 0: if lines is None or lines <= 0:
lines = _lines lines = _lines
return _terminal_size(columns, lines) return _terminal_size(columns, lines)
compat_shutil_get_terminal_size = compat_get_terminal_size
# compat_itertools_count
try: try:
itertools.count(start=0, step=1) type(itertools.count(start=0, step=1))
compat_itertools_count = itertools.count compat_itertools_count = itertools.count
except TypeError: # Python 2.6 except TypeError: # Python 2.6 lacks step
def compat_itertools_count(start=0, step=1): def compat_itertools_count(start=0, step=1):
while True: while True:
yield start yield start
start += step start += step
# compat_tokenize_tokenize
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
from tokenize import tokenize as compat_tokenize_tokenize from tokenize import tokenize as compat_tokenize_tokenize
else: else:
from tokenize import generate_tokens as compat_tokenize_tokenize from tokenize import generate_tokens as compat_tokenize_tokenize
# compat_struct_pack, compat_struct_unpack, compat_Struct
try: try:
struct.pack('!I', 0) type(struct.pack('!I', 0))
except TypeError: except TypeError:
# In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
# See https://bugs.python.org/issue19099 # See https://bugs.python.org/issue19099
@ -3308,8 +3440,10 @@ else:
compat_Struct = struct.Struct compat_Struct = struct.Struct
# compat_map/filter() returning an iterator, supposedly the # builtins returning an iterator
# same versioning as for zip below
# compat_map, compat_filter
# supposedly the same versioning as for zip below
try: try:
from future_builtins import map as compat_map from future_builtins import map as compat_map
except ImportError: except ImportError:
@ -3326,6 +3460,7 @@ except ImportError:
except ImportError: except ImportError:
compat_filter = filter compat_filter = filter
# compat_zip
try: try:
from future_builtins import zip as compat_zip from future_builtins import zip as compat_zip
except ImportError: # not 2.6+ or is 3.x except ImportError: # not 2.6+ or is 3.x
@ -3335,6 +3470,7 @@ except ImportError: # not 2.6+ or is 3.x
compat_zip = zip compat_zip = zip
# compat_itertools_zip_longest
# method renamed between Py2/3 # method renamed between Py2/3
try: try:
from itertools import zip_longest as compat_itertools_zip_longest from itertools import zip_longest as compat_itertools_zip_longest
@ -3342,7 +3478,8 @@ except ImportError:
from itertools import izip_longest as compat_itertools_zip_longest from itertools import izip_longest as compat_itertools_zip_longest
# new class in collections # compat_collections_chain_map
# collections.ChainMap: new class
try: try:
from collections import ChainMap as compat_collections_chain_map from collections import ChainMap as compat_collections_chain_map
# Py3.3's ChainMap is deficient # Py3.3's ChainMap is deficient
@ -3398,19 +3535,22 @@ except ImportError:
def new_child(self, m=None, **kwargs): def new_child(self, m=None, **kwargs):
m = m or {} m = m or {}
m.update(kwargs) m.update(kwargs)
return compat_collections_chain_map(m, *self.maps) # support inheritance !
return type(self)(m, *self.maps)
@property @property
def parents(self): def parents(self):
return compat_collections_chain_map(*(self.maps[1:])) return type(self)(*(self.maps[1:]))
# compat_re_Pattern, compat_re_Match
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?) # Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
compat_re_Pattern = type(re.compile('')) compat_re_Pattern = type(re.compile(''))
# and on the type of a match # and on the type of a match
compat_re_Match = type(re.match('a', 'a')) compat_re_Match = type(re.match('a', 'a'))
# compat_base64_b64decode
if sys.version_info < (3, 3): if sys.version_info < (3, 3):
def compat_b64decode(s, *args, **kwargs): def compat_b64decode(s, *args, **kwargs):
if isinstance(s, compat_str): if isinstance(s, compat_str):
@ -3422,6 +3562,7 @@ else:
compat_base64_b64decode = compat_b64decode compat_base64_b64decode = compat_b64decode
# compat_ctypes_WINFUNCTYPE
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0): if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
# names, see the original PyPy issue [1] and the youtube-dl one [2]. # names, see the original PyPy issue [1] and the youtube-dl one [2].
@ -3440,6 +3581,7 @@ else:
return ctypes.WINFUNCTYPE(*args, **kwargs) return ctypes.WINFUNCTYPE(*args, **kwargs)
# compat_open
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
# open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
def compat_open(file_, *args, **kwargs): def compat_open(file_, *args, **kwargs):
@ -3467,18 +3609,28 @@ except AttributeError:
def compat_datetime_timedelta_total_seconds(td): def compat_datetime_timedelta_total_seconds(td):
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
# optional decompression packages # optional decompression packages
# compat_brotli
# PyPi brotli package implements 'br' Content-Encoding # PyPi brotli package implements 'br' Content-Encoding
try: try:
import brotli as compat_brotli import brotli as compat_brotli
except ImportError: except ImportError:
compat_brotli = None compat_brotli = None
# compat_ncompress
# PyPi ncompress package implements 'compress' Content-Encoding # PyPi ncompress package implements 'compress' Content-Encoding
try: try:
import ncompress as compat_ncompress import ncompress as compat_ncompress
except ImportError: except ImportError:
compat_ncompress = None compat_ncompress = None
# compat_zstandard
# PyPi zstandard package implements 'zstd' Content-Encoding (RFC 8878 7.2)
try:
import zstandard as compat_zstandard
except ImportError:
compat_zstandard = None
legacy = [ legacy = [
'compat_HTMLParseError', 'compat_HTMLParseError',
@ -3495,6 +3647,7 @@ legacy = [
'compat_getpass', 'compat_getpass',
'compat_parse_qs', 'compat_parse_qs',
'compat_realpath', 'compat_realpath',
'compat_shlex_split',
'compat_urllib_parse_parse_qs', 'compat_urllib_parse_parse_qs',
'compat_urllib_parse_unquote', 'compat_urllib_parse_unquote',
'compat_urllib_parse_unquote_plus', 'compat_urllib_parse_unquote_plus',
@ -3508,8 +3661,6 @@ legacy = [
__all__ = [ __all__ = [
'compat_html_parser_HTMLParseError',
'compat_html_parser_HTMLParser',
'compat_Struct', 'compat_Struct',
'compat_base64_b64decode', 'compat_base64_b64decode',
'compat_basestring', 'compat_basestring',
@ -3518,13 +3669,9 @@ __all__ = [
'compat_chr', 'compat_chr',
'compat_collections_abc', 'compat_collections_abc',
'compat_collections_chain_map', 'compat_collections_chain_map',
'compat_datetime_timedelta_total_seconds',
'compat_http_cookiejar',
'compat_http_cookiejar_Cookie',
'compat_http_cookies',
'compat_http_cookies_SimpleCookie',
'compat_contextlib_suppress', 'compat_contextlib_suppress',
'compat_ctypes_WINFUNCTYPE', 'compat_ctypes_WINFUNCTYPE',
'compat_datetime_timedelta_total_seconds',
'compat_etree_fromstring', 'compat_etree_fromstring',
'compat_etree_iterfind', 'compat_etree_iterfind',
'compat_filter', 'compat_filter',
@ -3533,6 +3680,12 @@ __all__ = [
'compat_getpass_getpass', 'compat_getpass_getpass',
'compat_html_entities', 'compat_html_entities',
'compat_html_entities_html5', 'compat_html_entities_html5',
'compat_html_parser_HTMLParseError',
'compat_html_parser_HTMLParser',
'compat_http_cookiejar',
'compat_http_cookiejar_Cookie',
'compat_http_cookies',
'compat_http_cookies_SimpleCookie',
'compat_http_client', 'compat_http_client',
'compat_http_server', 'compat_http_server',
'compat_input', 'compat_input',
@ -3546,6 +3699,7 @@ __all__ = [
'compat_numeric_types', 'compat_numeric_types',
'compat_open', 'compat_open',
'compat_ord', 'compat_ord',
'compat_os_makedirs',
'compat_os_name', 'compat_os_name',
'compat_os_path_expanduser', 'compat_os_path_expanduser',
'compat_os_path_realpath', 'compat_os_path_realpath',
@ -3555,7 +3709,7 @@ __all__ = [
'compat_register_utf8', 'compat_register_utf8',
'compat_setenv', 'compat_setenv',
'compat_shlex_quote', 'compat_shlex_quote',
'compat_shlex_split', 'compat_shutil_get_terminal_size',
'compat_socket_create_connection', 'compat_socket_create_connection',
'compat_str', 'compat_str',
'compat_struct_pack', 'compat_struct_pack',
@ -3575,5 +3729,5 @@ __all__ = [
'compat_xml_etree_register_namespace', 'compat_xml_etree_register_namespace',
'compat_xpath', 'compat_xpath',
'compat_zip', 'compat_zip',
'workaround_optparse_bug9161', 'compat_zstandard',
] ]

View file

@ -1,5 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import logging
import os import os
import re import re
import subprocess import subprocess
@ -496,20 +497,31 @@ class FFmpegFD(ExternalFD):
# as a context manager (newer Python 3.x and compat) # as a context manager (newer Python 3.x and compat)
# Fixes "Resource Warning" in test/test_downloader_external.py # Fixes "Resource Warning" in test/test_downloader_external.py
# [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html # [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
with compat_subprocess_Popen(args, stdin=subprocess.PIPE, env=env) as proc: _proc = compat_subprocess_Popen(
args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
universal_newlines=True,
bufsize=1,
env=env,
)
ffmpeg_logger = logging.getLogger('ffmpeg')
with _proc as proc:
try: try:
for line in iter(proc.stdout.readline, ''):
ffmpeg_logger.debug(line.strip())
proc.stdout.close()
retval = proc.wait() retval = proc.wait()
except BaseException as e: except BaseException as e:
# subprocess.run would send the SIGKILL signal to ffmpeg and the if isinstance(e, KeyError) and (sys.platform != 'win32'):
# mp4 file couldn't be played, but if we ask ffmpeg to quit it process_communicate_or_kill(proc, 'q')
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
process_communicate_or_kill(proc, b'q')
else: else:
proc.kill() proc.kill()
raise raise
return retval return retval

View file

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{

View file

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',

View file

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None _GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None _GEO_IP_BLOCKS = None
_WORKING = True _WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None): def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader.""" """Constructor. Receives an optional downloader."""
@ -503,7 +505,7 @@ class InfoExtractor(object):
if not self._x_forwarded_for_ip: if not self._x_forwarded_for_ip:
# Geo bypass mechanism is explicitly disabled by user # Geo bypass mechanism is explicitly disabled by user
if not self._downloader.params.get('geo_bypass', True): if not self.get_param('geo_bypass', True):
return return
if not geo_bypass_context: if not geo_bypass_context:
@ -525,7 +527,7 @@ class InfoExtractor(object):
# Explicit IP block specified by user, use it right away # Explicit IP block specified by user, use it right away
# regardless of whether extractor is geo bypassable or not # regardless of whether extractor is geo bypassable or not
ip_block = self._downloader.params.get('geo_bypass_ip_block', None) ip_block = self.get_param('geo_bypass_ip_block', None)
# Otherwise use random IP block from geo bypass context but only # Otherwise use random IP block from geo bypass context but only
# if extractor is known as geo bypassable # if extractor is known as geo bypassable
@ -536,8 +538,8 @@ class InfoExtractor(object):
if ip_block: if ip_block:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block) self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
if self._downloader.params.get('verbose', False): if self.get_param('verbose', False):
self._downloader.to_screen( self.to_screen(
'[debug] Using fake IP %s as X-Forwarded-For.' '[debug] Using fake IP %s as X-Forwarded-For.'
% self._x_forwarded_for_ip) % self._x_forwarded_for_ip)
return return
@ -546,7 +548,7 @@ class InfoExtractor(object):
# Explicit country code specified by user, use it right away # Explicit country code specified by user, use it right away
# regardless of whether extractor is geo bypassable or not # regardless of whether extractor is geo bypassable or not
country = self._downloader.params.get('geo_bypass_country', None) country = self.get_param('geo_bypass_country', None)
# Otherwise use random country code from geo bypass context but # Otherwise use random country code from geo bypass context but
# only if extractor is known as geo bypassable # only if extractor is known as geo bypassable
@ -557,8 +559,8 @@ class InfoExtractor(object):
if country: if country:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country) self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
if self._downloader.params.get('verbose', False): if self.get_param('verbose', False):
self._downloader.to_screen( self.to_screen(
'[debug] Using fake IP %s (%s) as X-Forwarded-For.' '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
% (self._x_forwarded_for_ip, country.upper())) % (self._x_forwarded_for_ip, country.upper()))
@ -584,9 +586,9 @@ class InfoExtractor(object):
raise ExtractorError('An extractor error has occurred.', cause=e) raise ExtractorError('An extractor error has occurred.', cause=e)
def __maybe_fake_ip_and_retry(self, countries): def __maybe_fake_ip_and_retry(self, countries):
if (not self._downloader.params.get('geo_bypass_country', None) if (not self.get_param('geo_bypass_country', None)
and self._GEO_BYPASS and self._GEO_BYPASS
and self._downloader.params.get('geo_bypass', True) and self.get_param('geo_bypass', True)
and not self._x_forwarded_for_ip and not self._x_forwarded_for_ip
and countries): and countries):
country_code = random.choice(countries) country_code = random.choice(countries)
@ -696,7 +698,7 @@ class InfoExtractor(object):
if fatal: if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else: else:
self._downloader.report_warning(errmsg) self.report_warning(errmsg)
return False return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
@ -768,11 +770,11 @@ class InfoExtractor(object):
webpage_bytes = prefix + webpage_bytes webpage_bytes = prefix + webpage_bytes
if not encoding: if not encoding:
encoding = self._guess_encoding_from_content(content_type, webpage_bytes) encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
if self._downloader.params.get('dump_intermediate_pages', False): if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl()) self.to_screen('Dumping request to ' + urlh.geturl())
dump = base64.b64encode(webpage_bytes).decode('ascii') dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump) self.to_screen(dump)
if self._downloader.params.get('write_pages', False): if self.get_param('write_pages', False):
basen = '%s_%s' % (video_id, urlh.geturl()) basen = '%s_%s' % (video_id, urlh.geturl())
if len(basen) > 240: if len(basen) > 240:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
@ -974,19 +976,9 @@ class InfoExtractor(object):
"""Print msg to screen, prefixing it with '[ie_name]'""" """Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(self.__ie_msg(msg)) self._downloader.to_screen(self.__ie_msg(msg))
def write_debug(self, msg, only_once=False, _cache=[]): def write_debug(self, msg, only_once=False):
'''Log debug message or Print message to stderr''' '''Log debug message or Print message to stderr'''
if not self.get_param('verbose', False): self._downloader.write_debug(self.__ie_msg(msg), only_once=only_once)
return
message = '[debug] ' + self.__ie_msg(msg)
logger = self.get_param('logger')
if logger:
logger.debug(message)
else:
if only_once and hash(message) in _cache:
return
self._downloader.to_stderr(message)
_cache.append(hash(message))
# name, default=None, *args, **kwargs # name, default=None, *args, **kwargs
def get_param(self, name, *args, **kwargs): def get_param(self, name, *args, **kwargs):
@ -1082,7 +1074,7 @@ class InfoExtractor(object):
if mobj: if mobj:
break break
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty(): if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name _name = '\033[0;34m%s\033[0m' % name
else: else:
_name = name _name = name
@ -1100,7 +1092,7 @@ class InfoExtractor(object):
elif fatal: elif fatal:
raise RegexNotFoundError('Unable to extract %s' % _name) raise RegexNotFoundError('Unable to extract %s' % _name)
else: else:
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message()) self.report_warning('unable to extract %s' % _name + bug_reports_message())
return None return None
def _search_json(self, start_pattern, string, name, video_id, **kwargs): def _search_json(self, start_pattern, string, name, video_id, **kwargs):
@ -1170,7 +1162,7 @@ class InfoExtractor(object):
username = None username = None
password = None password = None
if self._downloader.params.get('usenetrc', False): if self.get_param('usenetrc', False):
try: try:
netrc_machine = netrc_machine or self._NETRC_MACHINE netrc_machine = netrc_machine or self._NETRC_MACHINE
info = netrc.netrc().authenticators(netrc_machine) info = netrc.netrc().authenticators(netrc_machine)
@ -1181,7 +1173,7 @@ class InfoExtractor(object):
raise netrc.NetrcParseError( raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine) 'No authenticators for %s' % netrc_machine)
except (AttributeError, IOError, netrc.NetrcParseError) as err: except (AttributeError, IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning( self.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err)) 'parsing .netrc: %s' % error_to_compat_str(err))
return username, password return username, password
@ -1218,10 +1210,10 @@ class InfoExtractor(object):
""" """
if self._downloader is None: if self._downloader is None:
return None return None
downloader_params = self._downloader.params
if downloader_params.get('twofactor') is not None: twofactor = self.get_param('twofactor')
return downloader_params['twofactor'] if twofactor is not None:
return twofactor
return compat_getpass('Type %s and press [Return]: ' % note) return compat_getpass('Type %s and press [Return]: ' % note)
@ -1356,7 +1348,7 @@ class InfoExtractor(object):
elif fatal: elif fatal:
raise RegexNotFoundError('Unable to extract JSON-LD') raise RegexNotFoundError('Unable to extract JSON-LD')
else: else:
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message()) self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
return {} return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None): def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
@ -1587,7 +1579,7 @@ class InfoExtractor(object):
if f.get('vcodec') == 'none': # audio only if f.get('vcodec') == 'none': # audio only
preference -= 50 preference -= 50
if self._downloader.params.get('prefer_free_formats'): if self.get_param('prefer_free_formats'):
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus'] ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
else: else:
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a'] ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
@ -1599,7 +1591,7 @@ class InfoExtractor(object):
else: else:
if f.get('acodec') == 'none': # video only if f.get('acodec') == 'none': # video only
preference -= 40 preference -= 40
if self._downloader.params.get('prefer_free_formats'): if self.get_param('prefer_free_formats'):
ORDER = ['flv', 'mp4', 'webm'] ORDER = ['flv', 'mp4', 'webm']
else: else:
ORDER = ['webm', 'flv', 'mp4'] ORDER = ['webm', 'flv', 'mp4']
@ -1665,7 +1657,7 @@ class InfoExtractor(object):
""" Either "http:" or "https:", depending on the user's preferences """ """ Either "http:" or "https:", depending on the user's preferences """
return ( return (
'http:' 'http:'
if self._downloader.params.get('prefer_insecure', False) if self.get_param('prefer_insecure', False)
else 'https:') else 'https:')
def _proto_relative_url(self, url, scheme=None): def _proto_relative_url(self, url, scheme=None):
@ -3197,7 +3189,7 @@ class InfoExtractor(object):
if fatal: if fatal:
raise ExtractorError(msg) raise ExtractorError(msg)
else: else:
self._downloader.report_warning(msg) self.report_warning(msg)
return res return res
def _float(self, v, name, fatal=False, **kwargs): def _float(self, v, name, fatal=False, **kwargs):
@ -3207,7 +3199,7 @@ class InfoExtractor(object):
if fatal: if fatal:
raise ExtractorError(msg) raise ExtractorError(msg)
else: else:
self._downloader.report_warning(msg) self.report_warning(msg)
return res return res
def _set_cookie(self, domain, name, value, expire_time=None, port=None, def _set_cookie(self, domain, name, value, expire_time=None, port=None,
@ -3216,12 +3208,12 @@ class InfoExtractor(object):
0, name, value, port, port is not None, domain, True, 0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time, domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest) discard, None, None, rest)
self._downloader.cookiejar.set_cookie(cookie) self.cookiejar.set_cookie(cookie)
def _get_cookies(self, url): def _get_cookies(self, url):
""" Return a compat_cookies_SimpleCookie with the cookies for the url """ """ Return a compat_cookies_SimpleCookie with the cookies for the url """
req = sanitized_Request(url) req = sanitized_Request(url)
self._downloader.cookiejar.add_cookie_header(req) self.cookiejar.add_cookie_header(req)
return compat_cookies_SimpleCookie(req.get_header('Cookie')) return compat_cookies_SimpleCookie(req.get_header('Cookie'))
def _apply_first_set_cookie_header(self, url_handle, cookie): def _apply_first_set_cookie_header(self, url_handle, cookie):
@ -3281,8 +3273,8 @@ class InfoExtractor(object):
return not any_restricted return not any_restricted
def extract_subtitles(self, *args, **kwargs): def extract_subtitles(self, *args, **kwargs):
if (self._downloader.params.get('writesubtitles', False) if (self.get_param('writesubtitles', False)
or self._downloader.params.get('listsubtitles')): or self.get_param('listsubtitles')):
return self._get_subtitles(*args, **kwargs) return self._get_subtitles(*args, **kwargs)
return {} return {}
@ -3303,7 +3295,11 @@ class InfoExtractor(object):
""" Merge subtitle dictionaries, language by language. """ """ Merge subtitle dictionaries, language by language. """
# ..., * , target=None # ..., * , target=None
target = kwargs.get('target') or dict(subtitle_dict1) target = kwargs.get('target')
if target is None:
target = dict(subtitle_dict1)
else:
subtitle_dicts = (subtitle_dict1,) + subtitle_dicts
for subtitle_dict in subtitle_dicts: for subtitle_dict in subtitle_dicts:
for lang in subtitle_dict: for lang in subtitle_dict:
@ -3311,8 +3307,8 @@ class InfoExtractor(object):
return target return target
def extract_automatic_captions(self, *args, **kwargs): def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) if (self.get_param('writeautomaticsub', False)
or self._downloader.params.get('listsubtitles')): or self.get_param('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs) return self._get_automatic_captions(*args, **kwargs)
return {} return {}
@ -3320,9 +3316,9 @@ class InfoExtractor(object):
raise NotImplementedError('This method must be implemented by subclasses') raise NotImplementedError('This method must be implemented by subclasses')
def mark_watched(self, *args, **kwargs): def mark_watched(self, *args, **kwargs):
if (self._downloader.params.get('mark_watched', False) if (self.get_param('mark_watched', False)
and (self._get_login_info()[0] is not None and (self._get_login_info()[0] is not None
or self._downloader.params.get('cookiefile') is not None)): or self.get_param('cookiefile') is not None)):
self._mark_watched(*args, **kwargs) self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs): def _mark_watched(self, *args, **kwargs):
@ -3330,7 +3326,7 @@ class InfoExtractor(object):
def geo_verification_headers(self): def geo_verification_headers(self):
headers = {} headers = {}
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy') geo_verification_proxy = self.get_param('geo_verification_proxy')
if geo_verification_proxy: if geo_verification_proxy:
headers['Ytdl-request-proxy'] = geo_verification_proxy headers['Ytdl-request-proxy'] = geo_verification_proxy
return headers return headers

View file

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor): class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True): def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False: if errnote is False:
return False return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE): class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX' IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{ _TESTS = [{
'note': 'Hub URLs redirect to ITVX', 'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012', 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'), 'ext': determine_ext(href, 'vtt'),
}) })
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}') next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {}) video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle') title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title) info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE): class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship' IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{ _TESTS = [{
'note': 'British Touring Car Championship', 'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch', 'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View file

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/'] ['arch', '', 'http://ussenate-f.akamaihd.net/']
] ]
_IE_NAME = 'senate.gov' IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',

View file

@ -27,6 +27,7 @@ from ..compat import (
) )
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..utils import ( from ..utils import (
bug_reports_message,
clean_html, clean_html,
dict_get, dict_get,
error_to_compat_str, error_to_compat_str,
@ -48,6 +49,7 @@ from ..utils import (
parse_duration, parse_duration,
parse_qs, parse_qs,
qualities, qualities,
remove_end,
remove_start, remove_start,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
@ -65,6 +67,7 @@ from ..utils import (
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
variadic,
) )
@ -89,12 +92,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '19.45.4', 'clientVersion': '20.10.4',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '18.1.0.22B83', 'osVersion': '18.3.2.22D82',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -107,7 +110,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'MWEB', 'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00', 'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA # mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
}, },
@ -120,7 +123,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'TVHTML5', 'clientName': 'TVHTML5',
'clientVersion': '7.20241201.18.00', 'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
@ -130,7 +134,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'WEB', 'clientName': 'WEB',
'clientVersion': '2.20241126.01.00', 'clientVersion': '2.20250312.04.00',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -339,14 +343,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not self._login(): if not self._login():
return return
_DEFAULT_API_DATA = { _DEFAULT_API_DATA = {'context': _INNERTUBE_CLIENTS['web']['INNERTUBE_CONTEXT']}
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20201021.03.00',
},
},
}
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
@ -460,6 +457,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'uploader': uploader, 'uploader': uploader,
} }
@staticmethod
def _extract_thumbnails(data, *path_list, **kw_final_key):
"""
Extract thumbnails from thumbnails dict
@param path_list: path list to level that contains 'thumbnails' key
"""
final_key = kw_final_key.get('final_key', 'thumbnails')
return traverse_obj(data, ((
tuple(variadic(path) + (final_key, Ellipsis)
for path in path_list or [()])), {
'url': ('url', T(url_or_none),
# Sometimes youtube gives a wrong thumbnail URL. See:
# https://github.com/yt-dlp/yt-dlp/issues/233
# https://github.com/ytdl-org/youtube-dl/issues/28023
T(lambda u: update_url(u, query=None) if u and 'maxresdefault' in u else u)),
'height': ('height', T(int_or_none)),
'width': ('width', T(int_or_none)),
}, T(lambda t: t if t.get('url') else None)))
def _search_results(self, query, params): def _search_results(self, query, params):
data = { data = {
'context': { 'context': {
@ -474,11 +491,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
data['params'] = params data['params'] = params
for page_num in itertools.count(1): for page_num in itertools.count(1):
search = self._download_json( search = self._download_json(
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'https://www.youtube.com/youtubei/v1/search',
video_id='query "%s"' % query, video_id='query "%s"' % query,
note='Downloading page %s' % page_num, note='Downloading page %s' % page_num,
errnote='Unable to download API page', fatal=False, errnote='Unable to download API page', fatal=False,
data=json.dumps(data).encode('utf8'), data=json.dumps(data).encode('utf8'),
query={
# 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'prettyPrint': 'false',
},
headers={'content-type': 'application/json'}) headers={'content-type': 'application/json'})
if not search: if not search:
break break
@ -669,9 +690,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES), 'invidious': '|'.join(_INVIDIOUS_SITES),
} }
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias(?:_tce)?\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', r'\b(?P<id>vfl[a-zA-Z0-9_-]{6,})\b.*?\.js$',
) )
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
@ -1564,6 +1585,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
} }
_PLAYER_JS_VARIANT_MAP = (
('main', 'player_ias.vflset/en_US/base.js'),
('tce', 'player_ias_tce.vflset/en_US/base.js'),
('tv', 'tv-player-ias.vflset/tv-player-ias.js'),
('tv_es6', 'tv-player-es6.vflset/tv-player-es6.js'),
('phone', 'player-plasma-ias-phone-en_US.vflset/base.js'),
('tablet', 'player-plasma-ias-tablet-en_US.vflset/base.js'),
)
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
if parse_qs(url).get('list', [None])[0]: if parse_qs(url).get('list', [None])[0]:
@ -1603,46 +1633,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
""" Return a string representation of a signature """ """ Return a string representation of a signature """
return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
@classmethod def _extract_player_info(self, player_url):
def _extract_player_info(cls, player_url): try:
for player_re in cls._PLAYER_INFO_RE: return self._search_regex(
id_m = re.search(player_re, player_url) self._PLAYER_INFO_RE, player_url, 'player info', group='id')
if id_m: except ExtractorError as e:
break raise ExtractorError(
else: 'Cannot identify player %r' % (player_url,), cause=e)
raise ExtractorError('Cannot identify player %r' % player_url)
return id_m.group('id')
def _load_player(self, video_id, player_url, fatal=True, player_id=None): def _player_js_cache_key(self, player_url, extra_id=None, _cache={}):
if not player_id: if player_url not in _cache:
player_id = self._extract_player_info(player_url) player_id = self._extract_player_info(player_url)
if player_id not in self._code_cache: player_path = remove_start(
compat_urllib_parse.urlparse(player_url).path,
'/s/player/{0}/'.format(player_id))
variant = next((k for k, v in self._PLAYER_JS_VARIANT_MAP
if v == player_path), None)
if not variant:
variant = next(
(k for k, v in self._PLAYER_JS_VARIANT_MAP
if re.match(re.escape(v).replace('en_US', r'\w+') + '$', player_path)),
None)
if not variant:
self.write_debug(
'Unable to determine player JS variant\n'
' player = {0}'.format(player_url), only_once=True)
variant = re.sub(r'[^a-zA-Z0-9]', '_', remove_end(player_path, '.js'))
_cache[player_url] = join_nonempty(player_id, variant)
if extra_id:
extra_id = '-'.join((_cache[player_url], extra_id))
assert os.path.basename(extra_id) == extra_id
return extra_id
return _cache[player_url]
def _load_player(self, video_id, player_url, fatal=True):
player_js_key = self._player_js_cache_key(player_url)
if player_js_key not in self._code_cache:
code = self._download_webpage( code = self._download_webpage(
player_url, video_id, fatal=fatal, player_url, video_id, fatal=fatal,
note='Downloading player ' + player_id, note='Downloading player {0}'.format(player_js_key),
errnote='Download of %s failed' % player_url) errnote='Download of {0} failed'.format(player_url))
if code: if code:
self._code_cache[player_id] = code self._code_cache[player_js_key] = code
return self._code_cache[player_id] if fatal else self._code_cache.get(player_id) return self._code_cache.get(player_js_key)
def _load_player_data_from_cache(self, name, player_url, extra_id=None):
cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
data = self._player_cache.get(cache_id)
if data:
return data
data = self.cache.load(*cache_id, min_ver='2025.04.07')
if data:
self._player_cache[cache_id] = data
return data
def _store_player_data_to_cache(self, name, player_url, data, extra_id=None):
cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
if cache_id not in self._player_cache:
self.cache.store(cache_id[0], cache_id[1], data)
self._player_cache[cache_id] = data
def _remove_player_data_from_cache(self, name, player_url, extra_id=None):
cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
if cache_id in self._player_cache:
self.cache.clear(*cache_id)
self._player_cache.pop(cache_id, None)
def _extract_signature_function(self, video_id, player_url, example_sig): def _extract_signature_function(self, video_id, player_url, example_sig):
player_id = self._extract_player_info(player_url) # player_id = self._extract_player_info(player_url)
# Read from filesystem cache # Read from filesystem cache
func_id = 'js_{0}_{1}'.format( extra_id = self._signature_cache_id(example_sig)
player_id, self._signature_cache_id(example_sig)) self.write_debug('Extracting signature function {0}-{1}'.format(player_url, extra_id))
assert os.path.basename(func_id) == func_id cache_spec, code = self._load_player_data_from_cache(
'sigfuncs', player_url, extra_id=extra_id), None
self.write_debug('Extracting signature function {0}'.format(func_id))
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
if not cache_spec: if not cache_spec:
code = self._load_player(video_id, player_url, player_id) code = self._load_player(video_id, player_url)
if code: if code:
res = self._parse_sig_js(code) res = self._parse_sig_js(code)
test_string = ''.join(map(compat_chr, range(len(example_sig)))) test_string = ''.join(map(compat_chr, range(len(example_sig))))
cache_spec = [ord(c) for c in res(test_string)] cache_spec = [ord(c) for c in res(test_string)]
self.cache.store('youtube-sigfuncs', func_id, cache_spec) self._store_player_data_to_cache(
'sigfuncs', player_url, cache_spec, extra_id=extra_id)
else:
self.report_warning(
'Failed to compute signature function {0}-{1}'.format(
player_url, extra_id))
return lambda s: ''.join(s[i] for i in cache_spec) return lambda s: ''.join(s[i] for i in cache_spec)
@ -1688,6 +1769,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' return %s\n') % (signature_id_tuple, expr_code) ' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen('Extracted signature function:\n' + code) self.to_screen('Extracted signature function:\n' + code)
def _extract_sig_fn(self, jsi, funcname):
var_ay = self._search_regex(
r'''(?x)
(?:\*/|\{|\n|^)\s*(?:'[^']+'\s*;\s*)
(var\s*[\w$]+\s*=\s*(?:
('|")(?:\\\2|(?!\2).)+\2\s*\.\s*split\(\s*('|")\W+\3\s*\)|
\[\s*(?:('|")(?:\\\4|(?!\4).)*\4\s*(?:(?=\])|,\s*))+\]
))(?=\s*[,;])
''', jsi.code, 'useful values', default='')
sig_fn = jsi.extract_function_code(funcname)
if var_ay:
sig_fn = (sig_fn[0], ';\n'.join((var_ay, sig_fn[1])))
return sig_fn
def _parse_sig_js(self, jscode): def _parse_sig_js(self, jscode):
# Examples where `sig` is funcname: # Examples where `sig` is funcname:
# sig=function(a){a=a.split(""); ... ;return a.join("")}; # sig=function(a){a=a.split(""); ... ;return a.join("")};
@ -1713,8 +1811,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
jscode, 'Initial JS player signature function name', group='sig') jscode, 'Initial JS player signature function name', group='sig')
jsi = JSInterpreter(jscode) jsi = JSInterpreter(jscode)
initial_function = jsi.extract_function(funcname)
return lambda s: initial_function([s]) initial_function = self._extract_sig_fn(jsi, funcname)
func = jsi.extract_function_from_code(*initial_function)
return lambda s: func([s])
def _cached(self, func, *cache_id): def _cached(self, func, *cache_id):
def inner(*args, **kwargs): def inner(*args, **kwargs):
@ -1774,6 +1876,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return ret return ret
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
func_name, idx = None, None
def generic_n_function_search(func_name=None):
return self._search_regex(
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>%s)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?(?:
["']enhanced_except_ |
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
)
''' % (func_name or r'(?!\d)[a-zA-Z\d_$]+',), jscode,
'Initial JS player n function name', group='name',
default=None if func_name else NO_DEFAULT)
# these special cases are redundant and probably obsolete (2025-04):
# they make the tests run ~10% faster without fallback warnings
r"""
func_name, idx = self._search_regex( func_name, idx = self._search_regex(
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}}; # (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}}; # (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
@ -1800,41 +1920,59 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
\(\s*[\w$]+\s*\) \(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'), ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None)) default=(None, None))
"""
if not func_name:
# nfunc=function(x){...}|function nfunc(x); ...
# ... var y=[nfunc]|y[idx]=nfunc);
# obvious REs hang, so use a two-stage tactic
for m in re.finditer(r'''(?x)
[\n;]var\s(?:(?:(?!,).)+,|\s)*?(?!\d)[\w$]+(?:\[(?P<idx>\d+)\])?\s*=\s*
(?(idx)|\[\s*)(?P<nfunc>(?!\d)[\w$]+)(?(idx)|\s*\])
\s*?[;\n]
''', jscode):
fn = self._search_regex(
r'[;,]\s*(function\s+)?({0})(?(1)|\s*=\s*function)\s*\((?!\d)[\w$]+\)\s*\{1}(?!\s*return\s)'.format(
re.escape(m.group('nfunc')), '{'),
jscode, 'Initial JS player n function name (2)', group=2, default=None)
if fn:
func_name = fn
idx = m.group('idx')
if generic_n_function_search(func_name):
# don't look any further
break
# thx bashonly: yt-dlp/yt-dlp/pull/10611 # thx bashonly: yt-dlp/yt-dlp/pull/10611
if not func_name: if not func_name:
self.report_warning('Falling back to generic n function search') self.report_warning('Falling back to generic n function search', only_once=True)
return self._search_regex( return generic_n_function_search()
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?(?:
["']enhanced_except_ |
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
)
''', jscode, 'Initial JS player n function name', group='name')
if not idx: if not idx:
return func_name return func_name
return self._search_json( return self._search_json(
r'var\s+{0}\s*='.format(re.escape(func_name)), jscode, r'(?<![\w-])var\s(?:(?:(?!,).)+,|\s)*?{0}\s*='.format(re.escape(func_name)), jscode,
'Initial JS player n function list ({0}.{1})'.format(func_name, idx), 'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]', func_name, contains_pattern=r'\[.+\]', end_pattern='[,;]',
transform_source=js_to_json)[int(idx)] transform_source=js_to_json)[int(idx)]
def _extract_n_function_code(self, video_id, player_url): def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url) player_id = self._extract_player_info(player_url)
func_code = self.cache.load('youtube-nsig', player_id) func_code = self._load_player_data_from_cache('nsig', player_url)
jscode = func_code or self._load_player(video_id, player_url) jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode) jsi = JSInterpreter(jscode)
if func_code: if func_code:
return jsi, player_id, func_code return jsi, player_id, func_code
func_name = self._extract_n_function_name(jscode) return self._extract_n_function_code_jsi(video_id, jsi, player_id, player_url)
func_code = jsi.extract_function_code(func_name) def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None, player_url=None):
func_name = self._extract_n_function_name(jsi.code)
self.cache.store('youtube-nsig', player_id, func_code) func_code = self._extract_sig_fn(jsi, func_name)
if player_url:
self._store_player_data_to_cache('nsig', player_url, func_code)
return jsi, player_id, func_code return jsi, player_id, func_code
def _extract_n_function_from_code(self, jsi, func_code): def _extract_n_function_from_code(self, jsi, func_code):
@ -1867,7 +2005,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
n_param = n_param[-1] n_param = n_param[-1]
n_response = decrypt_nsig(n_param)(n_param, video_id, player_url) n_response = decrypt_nsig(n_param)(n_param, video_id, player_url)
if n_response is None: if n_response is None:
# give up if descrambling failed # give up and forget cached data if descrambling failed
self._remove_player_data_from_cache('nsig', player_url)
break break
fmt['url'] = update_url_query(fmt['url'], {'n': n_response}) fmt['url'] = update_url_query(fmt['url'], {'n': n_response})
@ -1878,18 +2017,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
Required to tell API what sig/player version is in use. Required to tell API what sig/player version is in use.
""" """
sts = traverse_obj(ytcfg, 'STS', expected_type=int) sts = traverse_obj(ytcfg, 'STS', expected_type=int)
if not sts: if sts:
# Attempt to extract from player return sts
if player_url is None:
error_msg = 'Cannot extract signature timestamp without player_url.' if not player_url:
if fatal: error_msg = 'Cannot extract signature timestamp without player url'
raise ExtractorError(error_msg) if fatal:
self.report_warning(error_msg) raise ExtractorError(error_msg)
return self.report_warning(error_msg)
code = self._load_player(video_id, player_url, fatal=fatal) return None
sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '', sts = self._load_player_data_from_cache('sts', player_url)
'JS player signature timestamp', group='sts', fatal=fatal)) if sts:
return sts
# Attempt to extract from player
code = self._load_player(video_id, player_url, fatal=fatal)
sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
'JS player signature timestamp', group='sts', fatal=fatal))
if sts:
self._store_player_data_to_cache('sts', player_url, sts)
return sts return sts
def _mark_watched(self, video_id, player_response): def _mark_watched(self, video_id, player_response):
@ -2103,7 +2252,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_details = merge_dicts(*traverse_obj( video_details = merge_dicts(*traverse_obj(
(player_response, api_player_response), (player_response, api_player_response),
(Ellipsis, 'videoDetails', T(dict)))) (Ellipsis, 'videoDetails', T(dict))))
player_response.update(api_player_response or {}) player_response.update(filter_dict(
api_player_response or {}, cndn=lambda k, _: k != 'captions'))
player_response['videoDetails'] = video_details player_response['videoDetails'] = video_details
def is_agegated(playability): def is_agegated(playability):
@ -2533,8 +2683,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} }
pctr = traverse_obj( pctr = traverse_obj(
player_response, (player_response, api_player_response),
('captions', 'playerCaptionsTracklistRenderer', T(dict))) (Ellipsis, 'captions', 'playerCaptionsTracklistRenderer', T(dict)))
if pctr: if pctr:
def process_language(container, base_url, lang_code, query): def process_language(container, base_url, lang_code, query):
lang_subs = [] lang_subs = []
@ -2551,20 +2701,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def process_subtitles(): def process_subtitles():
subtitles = {} subtitles = {}
for caption_track in traverse_obj(pctr, ( for caption_track in traverse_obj(pctr, (
'captionTracks', lambda _, v: v.get('baseUrl'))): Ellipsis, 'captionTracks', lambda _, v: (
v.get('baseUrl') and v.get('languageCode')))):
base_url = self._yt_urljoin(caption_track['baseUrl']) base_url = self._yt_urljoin(caption_track['baseUrl'])
if not base_url: if not base_url:
continue continue
lang_code = caption_track['languageCode']
if caption_track.get('kind') != 'asr': if caption_track.get('kind') != 'asr':
lang_code = caption_track.get('languageCode')
if not lang_code:
continue
process_language( process_language(
subtitles, base_url, lang_code, {}) subtitles, base_url, lang_code, {})
continue continue
automatic_captions = {} automatic_captions = {}
process_language(
automatic_captions, base_url, lang_code, {})
for translation_language in traverse_obj(pctr, ( for translation_language in traverse_obj(pctr, (
'translationLanguages', lambda _, v: v.get('languageCode'))): Ellipsis, 'translationLanguages', lambda _, v: v.get('languageCode'))):
translation_language_code = translation_language['languageCode'] translation_language_code = translation_language['languageCode']
process_language( process_language(
automatic_captions, base_url, translation_language_code, automatic_captions, base_url, translation_language_code,
@ -3183,8 +3334,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
expected_type=txt_or_none) expected_type=txt_or_none)
def _grid_entries(self, grid_renderer): def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']: for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))):
if not isinstance(item, dict): lockup_view_model = traverse_obj(item, ('lockupViewModel', T(dict)))
if lockup_view_model:
entry = self._extract_lockup_view_model(lockup_view_model)
if entry:
yield entry
continue continue
renderer = self._extract_grid_item_renderer(item) renderer = self._extract_grid_item_renderer(item)
if not isinstance(renderer, dict): if not isinstance(renderer, dict):
@ -3268,6 +3423,39 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
continue continue
yield self._extract_video(renderer) yield self._extract_video(renderer)
def _extract_lockup_view_model(self, view_model):
content_id = view_model.get('contentId')
if not content_id:
return
content_type = view_model.get('contentType')
if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
self.report_warning(
'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True)
return
return merge_dicts(self.url_result(
update_url_query('https://www.youtube.com/playlist', {'list': content_id}),
ie=YoutubeTabIE.ie_key(), video_id=content_id), {
'title': traverse_obj(view_model, (
'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))),
'thumbnails': self._extract_thumbnails(view_model, (
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail',
'thumbnailViewModel', 'image'), final_key='sources'),
})
def _extract_shorts_lockup_view_model(self, view_model):
content_id = traverse_obj(view_model, (
'onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId',
T(lambda v: v if YoutubeIE.suitable(v) else None)))
if not content_id:
return
return merge_dicts(self.url_result(
content_id, ie=YoutubeIE.ie_key(), video_id=content_id), {
'title': traverse_obj(view_model, (
'overlayMetadata', 'primaryText', 'content', T(compat_str))),
'thumbnails': self._extract_thumbnails(
view_model, 'thumbnail', final_key='sources'),
})
def _video_entry(self, video_renderer): def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId') video_id = video_renderer.get('videoId')
if video_id: if video_id:
@ -3314,10 +3502,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
yield entry yield entry
def _rich_grid_entries(self, contents): def _rich_grid_entries(self, contents):
for content in contents: for content in traverse_obj(
content = traverse_obj( contents, (Ellipsis, 'richItemRenderer', 'content'),
content, ('richItemRenderer', 'content'), expected_type=dict):
expected_type=dict) or {}
video_renderer = traverse_obj( video_renderer = traverse_obj(
content, 'videoRenderer', 'reelItemRenderer', content, 'videoRenderer', 'reelItemRenderer',
expected_type=dict) expected_type=dict)
@ -3325,6 +3512,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
entry = self._video_entry(video_renderer) entry = self._video_entry(video_renderer)
if entry: if entry:
yield entry yield entry
# shorts item
shorts_lockup_view_model = content.get('shortsLockupViewModel')
if shorts_lockup_view_model:
entry = self._extract_shorts_lockup_view_model(shorts_lockup_view_model)
if entry:
yield entry
# playlist # playlist
renderer = traverse_obj( renderer = traverse_obj(
content, 'playlistRenderer', expected_type=dict) or {} content, 'playlistRenderer', expected_type=dict) or {}
@ -3363,23 +3556,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
next_continuation = cls._extract_next_continuation_data(renderer) next_continuation = cls._extract_next_continuation_data(renderer)
if next_continuation: if next_continuation:
return next_continuation return next_continuation
contents = [] for command in traverse_obj(renderer, (
for key in ('contents', 'items'): ('contents', 'items', 'rows'), Ellipsis, 'continuationItemRenderer',
contents.extend(try_get(renderer, lambda x: x[key], list) or []) ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
for content in contents: (('commandExecutorCommand', 'commands', Ellipsis), None), T(dict))):
if not isinstance(content, dict): continuation = traverse_obj(command, ('continuationCommand', 'token', T(compat_str)))
continue
continuation_ep = try_get(
content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
dict)
if not continuation_ep:
continue
continuation = try_get(
continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
if not continuation: if not continuation:
continue continue
ctp = continuation_ep.get('clickTrackingParams') ctp = command.get('clickTrackingParams')
return YoutubeTabIE._build_continuation_query(continuation, ctp) return cls._build_continuation_query(continuation, ctp)
def _entries(self, tab, item_id, webpage): def _entries(self, tab, item_id, webpage):
tab_content = try_get(tab, lambda x: x['content'], dict) tab_content = try_get(tab, lambda x: x['content'], dict)
@ -3428,6 +3613,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
entry = self._video_entry(renderer) entry = self._video_entry(renderer)
if entry: if entry:
yield entry yield entry
renderer = isr_content.get('richGridRenderer')
if renderer:
for from_ in self._rich_grid_entries(
traverse_obj(renderer, ('contents', Ellipsis, T(dict)))):
yield from_
continuation = self._extract_continuation(renderer)
continue
if not continuation: if not continuation:
continuation = self._extract_continuation(is_renderer) continuation = self._extract_continuation(is_renderer)
@ -3437,8 +3629,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
rich_grid_renderer = tab_content.get('richGridRenderer') rich_grid_renderer = tab_content.get('richGridRenderer')
if not rich_grid_renderer: if not rich_grid_renderer:
return return
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []): for from_ in self._rich_grid_entries(
yield entry traverse_obj(rich_grid_renderer, ('contents', Ellipsis, T(dict)))):
yield from_
continuation = self._extract_continuation(rich_grid_renderer) continuation = self._extract_continuation(rich_grid_renderer)
@ -3484,8 +3677,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# Downloading page may result in intermittent 5xx HTTP error # Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry # that is usually worked around with a retry
response = self._download_json( response = self._download_json(
'https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'https://www.youtube.com/youtubei/v1/browse',
None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''), None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
query={
# 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'prettyPrint': 'false',
},
headers=headers, data=json.dumps(data).encode('utf8')) headers=headers, data=json.dumps(data).encode('utf8'))
break break
except ExtractorError as e: except ExtractorError as e:

View file

@ -1,10 +1,12 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import calendar
import itertools import itertools
import json import json
import operator import operator
import re import re
import time
from functools import update_wrapper, wraps from functools import update_wrapper, wraps
@ -12,8 +14,10 @@ from .utils import (
error_to_compat_str, error_to_compat_str,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none,
js_to_json, js_to_json,
remove_quotes, remove_quotes,
str_or_none,
unified_timestamp, unified_timestamp,
variadic, variadic,
write_string, write_string,
@ -150,6 +154,7 @@ def _js_to_primitive(v):
) )
# more exact: yt-dlp/yt-dlp#12110
def _js_toString(v): def _js_toString(v):
return ( return (
'undefined' if v is JS_Undefined 'undefined' if v is JS_Undefined
@ -158,7 +163,7 @@ def _js_toString(v):
else 'null' if v is None else 'null' if v is None
# bool <= int: do this first # bool <= int: do this first
else ('false', 'true')[v] if isinstance(v, bool) else ('false', 'true')[v] if isinstance(v, bool)
else '{0:.7f}'.format(v).rstrip('.0') if isinstance(v, compat_numeric_types) else re.sub(r'(?<=\d)\.?0*$', '', '{0:.7f}'.format(v)) if isinstance(v, compat_numeric_types)
else _js_to_primitive(v)) else _js_to_primitive(v))
@ -235,7 +240,7 @@ def _js_ternary(cndn, if_true=True, if_false=False):
def _js_unary_op(op): def _js_unary_op(op):
@wraps_op(op) @wraps_op(op)
def wrapped(_, a): def wrapped(a, _):
return op(a) return op(a)
return wrapped return wrapped
@ -278,17 +283,6 @@ _OPERATORS = (
('**', _js_exp), ('**', _js_exp),
) )
_COMP_OPERATORS = (
('===', _js_id_op(operator.is_)),
('!==', _js_id_op(operator.is_not)),
('==', _js_eq),
('!=', _js_neq),
('<=', _js_comp_op(operator.le)),
('>=', _js_comp_op(operator.ge)),
('<', _js_comp_op(operator.lt)),
('>', _js_comp_op(operator.gt)),
)
_LOG_OPERATORS = ( _LOG_OPERATORS = (
('|', _js_bit_op(operator.or_)), ('|', _js_bit_op(operator.or_)),
('^', _js_bit_op(operator.xor)), ('^', _js_bit_op(operator.xor)),
@ -305,13 +299,27 @@ _SC_OPERATORS = (
_UNARY_OPERATORS_X = ( _UNARY_OPERATORS_X = (
('void', _js_unary_op(lambda _: JS_Undefined)), ('void', _js_unary_op(lambda _: JS_Undefined)),
('typeof', _js_unary_op(_js_typeof)), ('typeof', _js_unary_op(_js_typeof)),
# avoid functools.partial here since Py2 update_wrapper(partial) -> no __module__
('!', _js_unary_op(lambda x: _js_ternary(x, if_true=False, if_false=True))),
) )
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS)) _COMP_OPERATORS = (
('===', _js_id_op(operator.is_)),
('!==', _js_id_op(operator.is_not)),
('==', _js_eq),
('!=', _js_neq),
('<=', _js_comp_op(operator.le)),
('>=', _js_comp_op(operator.ge)),
('<', _js_comp_op(operator.lt)),
('>', _js_comp_op(operator.gt)),
)
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS + _SC_OPERATORS))
_NAME_RE = r'[a-zA-Z_$][\w$]*' _NAME_RE = r'[a-zA-Z_$][\w$]*'
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]'))) _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
_QUOTES = '\'"/' _QUOTES = '\'"/'
_NESTED_BRACKETS = r'[^[\]]+(?:\[[^[\]]+(?:\[[^\]]+\])?\])?'
class JS_Break(ExtractorError): class JS_Break(ExtractorError):
@ -348,7 +356,7 @@ class LocalNameSpace(ChainMap):
raise NotImplementedError('Deleting is not supported') raise NotImplementedError('Deleting is not supported')
def __repr__(self): def __repr__(self):
return 'LocalNameSpace%s' % (self.maps, ) return 'LocalNameSpace({0!r})'.format(self.maps)
class Debugger(object): class Debugger(object):
@ -369,6 +377,9 @@ class Debugger(object):
@classmethod @classmethod
def wrap_interpreter(cls, f): def wrap_interpreter(cls, f):
if not cls.ENABLED:
return f
@wraps(f) @wraps(f)
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs): def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
if cls.ENABLED and stmt.strip(): if cls.ENABLED and stmt.strip():
@ -404,11 +415,22 @@ class JSInterpreter(object):
class Exception(ExtractorError): class Exception(ExtractorError):
def __init__(self, msg, *args, **kwargs): def __init__(self, msg, *args, **kwargs):
expr = kwargs.pop('expr', None) expr = kwargs.pop('expr', None)
msg = str_or_none(msg, default='"None"')
if expr is not None: if expr is not None:
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr) msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs) super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
class JS_RegExp(object): class JS_Object(object):
def __getitem__(self, key):
if hasattr(self, key):
return getattr(self, key)
raise KeyError(key)
def dump(self):
"""Serialise the instance"""
raise NotImplementedError
class JS_RegExp(JS_Object):
RE_FLAGS = { RE_FLAGS = {
# special knowledge: Python's re flags are bitmask values, current max 128 # special knowledge: Python's re flags are bitmask values, current max 128
# invent new bitmask values well above that for literal parsing # invent new bitmask values well above that for literal parsing
@ -429,15 +451,24 @@ class JSInterpreter(object):
def __init__(self, pattern_txt, flags=0): def __init__(self, pattern_txt, flags=0):
if isinstance(flags, compat_str): if isinstance(flags, compat_str):
flags, _ = self.regex_flags(flags) flags, _ = self.regex_flags(flags)
# First, avoid https://github.com/python/cpython/issues/74534
self.__self = None self.__self = None
self.__pattern_txt = pattern_txt.replace('[[', r'[\[') pattern_txt = str_or_none(pattern_txt) or '(?:)'
# escape unintended embedded flags
pattern_txt = re.sub(
r'(\(\?)([aiLmsux]*)(-[imsx]+:|(?<!\?)\))',
lambda m: ''.join(
(re.escape(m.group(1)), m.group(2), re.escape(m.group(3)))
if m.group(3) == ')'
else ('(?:', m.group(2), m.group(3))),
pattern_txt)
# Avoid https://github.com/python/cpython/issues/74534
self.source = pattern_txt.replace('[[', r'[\[')
self.__flags = flags self.__flags = flags
def __instantiate(self): def __instantiate(self):
if self.__self: if self.__self:
return return
self.__self = re.compile(self.__pattern_txt, self.__flags) self.__self = re.compile(self.source, self.__flags)
# Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
for name in dir(self.__self): for name in dir(self.__self):
# Only these? Obviously __class__, __init__. # Only these? Obviously __class__, __init__.
@ -445,16 +476,15 @@ class JSInterpreter(object):
# that can't be setattr'd but also can't need to be copied. # that can't be setattr'd but also can't need to be copied.
if name in ('__class__', '__init__', '__weakref__'): if name in ('__class__', '__init__', '__weakref__'):
continue continue
setattr(self, name, getattr(self.__self, name)) if name == 'flags':
setattr(self, name, getattr(self.__self, name, self.__flags))
else:
setattr(self, name, getattr(self.__self, name))
def __getattr__(self, name): def __getattr__(self, name):
self.__instantiate() self.__instantiate()
# make Py 2.6 conform to its lying documentation if name == 'pattern':
if name == 'flags': self.pattern = self.source
self.flags = self.__flags
return self.flags
elif name == 'pattern':
self.pattern = self.__pattern_txt
return self.pattern return self.pattern
elif hasattr(self.__self, name): elif hasattr(self.__self, name):
v = getattr(self.__self, name) v = getattr(self.__self, name)
@ -462,6 +492,26 @@ class JSInterpreter(object):
return v return v
elif name in ('groupindex', 'groups'): elif name in ('groupindex', 'groups'):
return 0 if name == 'groupindex' else {} return 0 if name == 'groupindex' else {}
else:
flag_attrs = ( # order by 2nd elt
('hasIndices', 'd'),
('global', 'g'),
('ignoreCase', 'i'),
('multiline', 'm'),
('dotAll', 's'),
('unicode', 'u'),
('unicodeSets', 'v'),
('sticky', 'y'),
)
for k, c in flag_attrs:
if name == k:
return bool(self.RE_FLAGS[c] & self.__flags)
else:
if name == 'flags':
return ''.join(
(c if self.RE_FLAGS[c] & self.__flags else '')
for _, c in flag_attrs)
raise AttributeError('{0} has no attribute named {1}'.format(self, name)) raise AttributeError('{0} has no attribute named {1}'.format(self, name))
@classmethod @classmethod
@ -475,6 +525,85 @@ class JSInterpreter(object):
flags |= cls.RE_FLAGS[ch] flags |= cls.RE_FLAGS[ch]
return flags, expr[idx + 1:] return flags, expr[idx + 1:]
def dump(self):
return '(/{0}/{1})'.format(
re.sub(r'(?<!\\)/', r'\/', self.source),
self.flags)
@staticmethod
def escape(string_):
return re.escape(string_)
class JS_Date(JS_Object):
_t = None
@staticmethod
def __ymd_etc(*args, **kw_is_utc):
# args: year, monthIndex, day, hours, minutes, seconds, milliseconds
is_utc = kw_is_utc.get('is_utc', False)
args = list(args[:7])
args += [0] * (9 - len(args))
args[1] += 1 # month 0..11 -> 1..12
ms = args[6]
for i in range(6, 9):
args[i] = -1 # don't know
if is_utc:
args[-1] = 1
# TODO: [MDN] When a segment overflows or underflows its expected
# range, it usually "carries over to" or "borrows from" the higher segment.
try:
mktime = calendar.timegm if is_utc else time.mktime
return mktime(time.struct_time(args)) * 1000 + ms
except (OverflowError, ValueError):
return None
@classmethod
def UTC(cls, *args):
t = cls.__ymd_etc(*args, is_utc=True)
return _NaN if t is None else t
@staticmethod
def parse(date_str, **kw_is_raw):
is_raw = kw_is_raw.get('is_raw', False)
t = unified_timestamp(str_or_none(date_str), False)
return int(t * 1000) if t is not None else t if is_raw else _NaN
@staticmethod
def now(**kw_is_raw):
is_raw = kw_is_raw.get('is_raw', False)
t = time.time()
return int(t * 1000) if t is not None else t if is_raw else _NaN
def __init__(self, *args):
if not args:
args = [self.now(is_raw=True)]
if len(args) == 1:
if isinstance(args[0], JSInterpreter.JS_Date):
self._t = int_or_none(args[0].valueOf(), default=None)
else:
arg_type = _js_typeof(args[0])
if arg_type == 'string':
self._t = self.parse(args[0], is_raw=True)
elif arg_type == 'number':
self._t = int(args[0])
else:
self._t = self.__ymd_etc(*args)
def toString(self):
try:
return time.strftime('%a %b %0d %Y %H:%M:%S %Z%z', self._t).rstrip()
except TypeError:
return "Invalid Date"
def valueOf(self):
return _NaN if self._t is None else self._t
def dump(self):
return '(new Date({0}))'.format(self.toString())
@classmethod @classmethod
def __op_chars(cls): def __op_chars(cls):
op_chars = set(';,[') op_chars = set(';,[')
@ -578,59 +707,7 @@ class JSInterpreter(object):
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X)) _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X))
return _cached return _cached
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): def _separate_at_op(self, expr, max_split=None):
if op in ('||', '&&'):
if (op == '&&') ^ _js_ternary(left_val):
return left_val # short circuiting
elif op == '??':
if left_val not in (None, JS_Undefined):
return left_val
elif op == '?':
right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
if not opfunc:
return right_val
try:
# print('Eval:', opfunc.__name__, left_val, right_val)
return opfunc(left_val, right_val)
except Exception as e:
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
def _index(self, obj, idx, allow_undefined=True):
if idx == 'length' and isinstance(obj, list):
return len(obj)
try:
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
except (TypeError, KeyError, IndexError) as e:
if allow_undefined:
# when is not allowed?
return JS_Undefined
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace):
try:
return json.dumps(obj)
except TypeError:
return self._named_object(namespace, obj)
# used below
_VAR_RET_THROW_RE = re.compile(r'''(?x)
(?:(?P<var>var|const|let)\s+|(?P<ret>return)(?:\s+|(?=["'])|$)|(?P<throw>throw)\s+)
''')
_COMPOUND_RE = re.compile(r'''(?x)
(?P<try>try)\s*\{|
(?P<if>if)\s*\(|
(?P<switch>switch)\s*\(|
(?P<for>for)\s*\(|
(?P<while>while)\s*\(
''')
_FINALLY_RE = re.compile(r'finally\s*\{')
_SWITCH_RE = re.compile(r'switch\s*\(')
def handle_operators(self, expr, local_vars, allow_recursion):
for op, _ in self._all_operators(): for op, _ in self._all_operators():
# hackety: </> have higher priority than <</>>, but don't confuse them # hackety: </> have higher priority than <</>>, but don't confuse them
@ -658,23 +735,98 @@ class JSInterpreter(object):
if separated[-1][-1:] in self.OP_CHARS: if separated[-1][-1:] in self.OP_CHARS:
right_expr = separated.pop() + right_expr right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right) # hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1] if separated else '' separated.append(right_expr)
for dm_op in ('*', '%', '/', '**'): dm_ops = ('*', '%', '/', '**')
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) dm_chars = set(''.join(dm_ops))
if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr
if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None
break
if right_expr is None:
continue
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) def yield_terms(s):
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True skip = False
for i, term in enumerate(s[:-1]):
if skip:
skip = False
continue
if not (dm_chars & set(term)):
yield term
continue
for dm_op in dm_ops:
bodmas = list(self._separate(term, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip():
bodmas[-1] = (op if op == '-' else '') + s[i + 1]
yield dm_op.join(bodmas)
skip = True
break
else:
if term:
yield term
if not skip and s[-1]:
yield s[-1]
separated = list(yield_terms(separated))
right_expr = separated.pop() if len(separated) > 1 else None
expr = op.join(separated)
if right_expr is None:
continue
return op, separated, right_expr
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
if op in ('||', '&&'):
if (op == '&&') ^ _js_ternary(left_val):
return left_val # short circuiting
elif op == '??':
if left_val not in (None, JS_Undefined):
return left_val
elif op == '?':
right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
right_val = self.interpret_expression(right_expr, local_vars, allow_recursion) if right_expr else left_val
opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
if not opfunc:
return right_val
try:
# print('Eval:', opfunc.__name__, left_val, right_val)
return opfunc(left_val, right_val)
except Exception as e:
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
def _index(self, obj, idx, allow_undefined=None):
if idx == 'length' and isinstance(obj, list):
return len(obj)
try:
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
except (TypeError, KeyError, IndexError, ValueError) as e:
# allow_undefined is None gives correct behaviour
if allow_undefined or (
allow_undefined is None and not isinstance(e, TypeError)):
return JS_Undefined
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace):
if obj is JS_Undefined:
return 'undefined'
try:
return json.dumps(obj)
except TypeError:
return self._named_object(namespace, obj)
# used below
_VAR_RET_THROW_RE = re.compile(r'''(?x)
(?:(?P<var>var|const|let)\s+|(?P<ret>return)(?:\s+|(?=["'])|$)|(?P<throw>throw)\s+)
''')
_COMPOUND_RE = re.compile(r'''(?x)
(?P<try>try)\s*\{|
(?P<if>if)\s*\(|
(?P<switch>switch)\s*\(|
(?P<for>for)\s*\(|
(?P<while>while)\s*\(
''')
_FINALLY_RE = re.compile(r'finally\s*\{')
_SWITCH_RE = re.compile(r'switch\s*\(')
def _eval_operator(self, op, left_expr, right_expr, expr, local_vars, allow_recursion):
left_val = self.interpret_expression(left_expr, local_vars, allow_recursion)
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion)
@Debugger.wrap_interpreter @Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100): def interpret_statement(self, stmt, local_vars, allow_recursion=100):
@ -715,7 +867,7 @@ class JSInterpreter(object):
new_kw, _, obj = expr.partition('new ') new_kw, _, obj = expr.partition('new ')
if not new_kw: if not new_kw:
for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)), for klass, konstr in (('Date', lambda *x: self.JS_Date(*x).valueOf()),
('RegExp', self.JS_RegExp), ('RegExp', self.JS_RegExp),
('Error', self.Exception)): ('Error', self.Exception)):
if not obj.startswith(klass + '('): if not obj.startswith(klass + '('):
@ -730,15 +882,19 @@ class JSInterpreter(object):
else: else:
raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr) raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
# apply unary operators (see new above)
for op, _ in _UNARY_OPERATORS_X: for op, _ in _UNARY_OPERATORS_X:
if not expr.startswith(op): if not expr.startswith(op):
continue continue
operand = expr[len(op):] operand = expr[len(op):]
if not operand or operand[0] != ' ': if not operand or (op.isalpha() and operand[0] != ' '):
continue continue
op_result = self.handle_operators(expr, local_vars, allow_recursion) separated = self._separate_at_op(operand, max_split=1)
if op_result: if separated:
return op_result[0], should_return next_op, separated, right_expr = separated
separated.append(right_expr)
operand = next_op.join(separated)
return self._eval_operator(op, operand, '', expr, local_vars, allow_recursion), should_return
if expr.startswith('{'): if expr.startswith('{'):
inner, outer = self._separate_at_paren(expr) inner, outer = self._separate_at_paren(expr)
@ -933,15 +1089,18 @@ class JSInterpreter(object):
m = re.match(r'''(?x) m = re.match(r'''(?x)
(?P<assign> (?P<assign>
(?P<out>{_NAME_RE})(?:\[(?P<out_idx>(?:.+?\]\s*\[)*.+?)\])?\s* (?P<out>{_NAME_RE})(?P<out_idx>(?:\[{_NESTED_BRACKETS}\])+)?\s*
(?P<op>{_OPERATOR_RE})? (?P<op>{_OPERATOR_RE})?
=(?!=)(?P<expr>.*)$ =(?!=)(?P<expr>.*)$
)|(?P<return> )|(?P<return>
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$ (?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
)|(?P<indexing>
(?P<in>{_NAME_RE})\[(?P<in_idx>(?:.+?\]\s*\[)*.+?)\]$
)|(?P<attribute> )|(?P<attribute>
(?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s* (?P<var>{_NAME_RE})(?:
(?P<nullish>\?)?\.(?P<member>[^(]+)|
\[(?P<member2>{_NESTED_BRACKETS})\]
)\s*
)|(?P<indexing>
(?P<in>{_NAME_RE})(?P<in_idx>\[.+\])$
)|(?P<function> )|(?P<function>
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$ (?P<fname>{_NAME_RE})\((?P<args>.*)\)$
)'''.format(**globals()), expr) )'''.format(**globals()), expr)
@ -956,13 +1115,18 @@ class JSInterpreter(object):
elif left_val in (None, JS_Undefined): elif left_val in (None, JS_Undefined):
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr) raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
indexes = re.split(r'\]\s*\[', m.group('out_idx')) indexes = md['out_idx']
for i, idx in enumerate(indexes, 1): while indexes:
idx, indexes = self._separate_at_paren(indexes)
idx = self.interpret_expression(idx, local_vars, allow_recursion) idx = self.interpret_expression(idx, local_vars, allow_recursion)
if i < len(indexes): if indexes:
left_val = self._index(left_val, idx) left_val = self._index(left_val, idx)
if isinstance(idx, float): if isinstance(idx, float):
idx = int(idx) idx = int(idx)
if isinstance(left_val, list) and len(left_val) <= int_or_none(idx, default=-1):
# JS Array is a sparsely assignable list
# TODO: handle extreme sparsity without memory bloat, eg using auxiliary dict
left_val.extend((idx - len(left_val) + 1) * [JS_Undefined])
left_val[idx] = self._operator( left_val[idx] = self._operator(
m.group('op'), self._index(left_val, idx) if m.group('op') else None, m.group('op'), self._index(left_val, idx) if m.group('op') else None,
m.group('expr'), expr, local_vars, allow_recursion) m.group('expr'), expr, local_vars, allow_recursion)
@ -1000,14 +1164,17 @@ class JSInterpreter(object):
if md.get('indexing'): if md.get('indexing'):
val = local_vars[m.group('in')] val = local_vars[m.group('in')]
for idx in re.split(r'\]\s*\[', m.group('in_idx')): indexes = m.group('in_idx')
while indexes:
idx, indexes = self._separate_at_paren(indexes)
idx = self.interpret_expression(idx, local_vars, allow_recursion) idx = self.interpret_expression(idx, local_vars, allow_recursion)
val = self._index(val, idx) val = self._index(val, idx)
return val, should_return return val, should_return
op_result = self.handle_operators(expr, local_vars, allow_recursion) separated = self._separate_at_op(expr)
if op_result: if separated:
return op_result[0], should_return op, separated, right_expr = separated
return self._eval_operator(op, op.join(separated), right_expr, expr, local_vars, allow_recursion), should_return
if md.get('attribute'): if md.get('attribute'):
variable, member, nullish = m.group('var', 'member', 'nullish') variable, member, nullish = m.group('var', 'member', 'nullish')
@ -1028,12 +1195,15 @@ class JSInterpreter(object):
def eval_method(variable, member): def eval_method(variable, member):
if (variable, member) == ('console', 'debug'): if (variable, member) == ('console', 'debug'):
if Debugger.ENABLED: if Debugger.ENABLED:
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion)) Debugger.write(self.interpret_expression('[{0}]'.format(arg_str), local_vars, allow_recursion))
return return
types = { types = {
'String': compat_str, 'String': compat_str,
'Math': float, 'Math': float,
'Array': list, 'Array': list,
'Date': self.JS_Date,
'RegExp': self.JS_RegExp,
# 'Error': self.Exception, # has no std static methods
} }
obj = local_vars.get(variable) obj = local_vars.get(variable)
if obj in (JS_Undefined, None): if obj in (JS_Undefined, None):
@ -1041,7 +1211,7 @@ class JSInterpreter(object):
if obj is JS_Undefined: if obj is JS_Undefined:
try: try:
if variable not in self._objects: if variable not in self._objects:
self._objects[variable] = self.extract_object(variable) self._objects[variable] = self.extract_object(variable, local_vars)
obj = self._objects[variable] obj = self._objects[variable]
except self.Exception: except self.Exception:
if not nullish: if not nullish:
@ -1052,7 +1222,7 @@ class JSInterpreter(object):
# Member access # Member access
if arg_str is None: if arg_str is None:
return self._index(obj, member) return self._index(obj, member, nullish)
# Function call # Function call
argvals = [ argvals = [
@ -1086,6 +1256,8 @@ class JSInterpreter(object):
assertion(len(argvals) == 2, 'takes two arguments') assertion(len(argvals) == 2, 'takes two arguments')
return argvals[0] ** argvals[1] return argvals[0] ** argvals[1]
raise self.Exception('Unsupported Math method ' + member, expr=expr) raise self.Exception('Unsupported Math method ' + member, expr=expr)
elif obj is self.JS_Date:
return getattr(obj, member)(*argvals)
if member == 'split': if member == 'split':
assertion(len(argvals) <= 2, 'takes at most two arguments') assertion(len(argvals) <= 2, 'takes at most two arguments')
@ -1126,9 +1298,10 @@ class JSInterpreter(object):
elif member == 'join': elif member == 'join':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, list), 'must be applied on a list')
assertion(len(argvals) <= 1, 'takes at most one argument') assertion(len(argvals) <= 1, 'takes at most one argument')
return (',' if len(argvals) == 0 else argvals[0]).join( return (',' if len(argvals) == 0 or argvals[0] in (None, JS_Undefined)
('' if x in (None, JS_Undefined) else _js_toString(x)) else argvals[0]).join(
for x in obj) ('' if x in (None, JS_Undefined) else _js_toString(x))
for x in obj)
elif member == 'reverse': elif member == 'reverse':
assertion(not argvals, 'does not take any arguments') assertion(not argvals, 'does not take any arguments')
obj.reverse() obj.reverse()
@ -1192,7 +1365,8 @@ class JSInterpreter(object):
assertion(len(argvals) == 2, 'takes exactly two arguments') assertion(len(argvals) == 2, 'takes exactly two arguments')
# TODO: argvals[1] callable, other Py vs JS edge cases # TODO: argvals[1] callable, other Py vs JS edge cases
if isinstance(argvals[0], self.JS_RegExp): if isinstance(argvals[0], self.JS_RegExp):
count = 0 if argvals[0].flags & self.JS_RegExp.RE_FLAGS['g'] else 1 # access JS member with Py reserved name
count = 0 if self._index(argvals[0], 'global') else 1
assertion(member != 'replaceAll' or count == 0, assertion(member != 'replaceAll' or count == 0,
'replaceAll must be called with a global RegExp') 'replaceAll must be called with a global RegExp')
return argvals[0].sub(argvals[1], obj, count=count) return argvals[0].sub(argvals[1], obj, count=count)
@ -1233,7 +1407,7 @@ class JSInterpreter(object):
for v in self._separate(list_txt): for v in self._separate(list_txt):
yield self.interpret_expression(v, local_vars, allow_recursion) yield self.interpret_expression(v, local_vars, allow_recursion)
def extract_object(self, objname): def extract_object(self, objname, *global_stack):
_FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE) _FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
obj = {} obj = {}
fields = next(filter(None, ( fields = next(filter(None, (
@ -1254,7 +1428,8 @@ class JSInterpreter(object):
fields): fields):
argnames = self.build_arglist(f.group('args')) argnames = self.build_arglist(f.group('args'))
name = remove_quotes(f.group('key')) name = remove_quotes(f.group('key'))
obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name)) obj[name] = function_with_repr(
self.build_function(argnames, f.group('code'), *global_stack), 'F<{0}>'.format(name))
return obj return obj
@ -1286,19 +1461,21 @@ class JSInterpreter(object):
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
return self.build_arglist(func_m.group('args')), code return self.build_arglist(func_m.group('args')), code
def extract_function(self, funcname): def extract_function(self, funcname, *global_stack):
return function_with_repr( return function_with_repr(
self.extract_function_from_code(*self.extract_function_code(funcname)), self.extract_function_from_code(*itertools.chain(
self.extract_function_code(funcname), global_stack)),
'F<%s>' % (funcname,)) 'F<%s>' % (funcname,))
def extract_function_from_code(self, argnames, code, *global_stack): def extract_function_from_code(self, argnames, code, *global_stack):
local_vars = {} local_vars = {}
start = None
while True: while True:
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code) mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code[start:])
if mobj is None: if mobj is None:
break break
start, body_start = mobj.span() start, body_start = ((start or 0) + x for x in mobj.span())
body, remaining = self._separate_at_paren(code[body_start - 1:]) body, remaining = self._separate_at_paren(code[body_start - 1:])
name = self._named_object(local_vars, self.extract_function_from_code( name = self._named_object(local_vars, self.extract_function_from_code(
[x.strip() for x in mobj.group('args').split(',')], [x.strip() for x in mobj.group('args').split(',')],

View file

@ -814,6 +814,11 @@ def parseOpts(overrideArguments=None):
'--no-post-overwrites', '--no-post-overwrites',
action='store_true', dest='nopostoverwrites', default=False, action='store_true', dest='nopostoverwrites', default=False,
help='Do not overwrite post-processed files; the post-processed files are overwritten by default') help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option(
'--aac-to-mp3',
action='store_true', dest='aacToMp3', default=False,
help='Convert AAC files to MP3',
)
postproc.add_option( postproc.add_option(
'--embed-subs', '--embed-subs',
action='store_true', dest='embedsubtitles', default=False, action='store_true', dest='embedsubtitles', default=False,

View file

@ -2,6 +2,7 @@ from __future__ import unicode_literals
from .embedthumbnail import EmbedThumbnailPP from .embedthumbnail import EmbedThumbnailPP
from .ffmpeg import ( from .ffmpeg import (
ConvertAACToMP3PP,
FFmpegPostProcessor, FFmpegPostProcessor,
FFmpegEmbedSubtitlePP, FFmpegEmbedSubtitlePP,
FFmpegExtractAudioPP, FFmpegExtractAudioPP,
@ -23,6 +24,7 @@ def get_postprocessor(key):
__all__ = [ __all__ = [
'ConvertAACToMP3PP',
'EmbedThumbnailPP', 'EmbedThumbnailPP',
'ExecAfterDownloadPP', 'ExecAfterDownloadPP',
'FFmpegEmbedSubtitlePP', 'FFmpegEmbedSubtitlePP',

View file

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import logging
import os import os
import subprocess import subprocess
@ -21,6 +22,9 @@ from ..utils import (
from ..compat import compat_open as open from ..compat import compat_open as open
logger = logging.getLogger('soundcloudutil.downloader')
class EmbedThumbnailPPError(PostProcessingError): class EmbedThumbnailPPError(PostProcessingError):
pass pass
@ -128,6 +132,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
else: else:
raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.') logger.warning('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
# raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
return [], info return [], info

View file

@ -4,7 +4,8 @@ import os
import subprocess import subprocess
import time import time
import re import re
from pathlib import Path
from typing import Any
from .common import AudioConversionError, PostProcessor from .common import AudioConversionError, PostProcessor
@ -651,3 +652,26 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
} }
return sub_filenames, info return sub_filenames, info
class ConvertAACToMP3PP(FFmpegPostProcessor):
"""
Custom post processor that converts .aac files to .mp3 files
"""
def run(self, info: dict[str, Any]) -> tuple[list[str], dict[str, Any]]:
if info['ext'] == 'aac':
aac_path = Path(info['filepath'])
mp3_path = aac_path.with_suffix('.mp3')
self._downloader.to_screen('[ffmpeg] Converting .aac to .mp3')
options: list[str] = [
'-codec:a', 'libmp3lame',
'-qscale:a', '0',
]
self.run_ffmpeg(str(aac_path), str(mp3_path), options)
aac_path.unlink()
info['filepath'] = str(mp3_path)
info['ext'] = 'mp3'
return [], info

View file

@ -4204,12 +4204,16 @@ def lowercase_escape(s):
s) s)
def escape_rfc3986(s): def escape_rfc3986(s, safe=None):
"""Escape non-ASCII characters as suggested by RFC 3986""" """Escape non-ASCII characters as suggested by RFC 3986"""
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
s = _encode_compat_str(s, 'utf-8') s = _encode_compat_str(s, 'utf-8')
if safe is not None:
safe = _encode_compat_str(safe, 'utf-8')
if safe is None:
safe = b"%/;:@&=+$,!~*'()?#[]"
# ensure unicode: after quoting, it can always be converted # ensure unicode: after quoting, it can always be converted
return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")) return compat_str(compat_urllib_parse.quote(s, safe))
def escape_url(url): def escape_url(url):

View file

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2021.12.17' __version__ = '2025.04.07'