[jsinterp] Overhaul JSInterp to handle new YT players 4c3f79c5, 324f67b9 (#31170)

* back-port from yt-dlp 8f53dc44a0cc1c2d98c35740b9293462c080f5d0, thanks pukkandan
* also support void, improve <</>> precedence, improve expressions in comma-list
* add more tests
This commit is contained in:
dirkf 2022-08-14 18:45:45 +01:00 committed by GitHub
parent e6a836d54c
commit d231b56717
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 500 additions and 247 deletions

View file

@ -1696,6 +1696,17 @@ MONTH_NAMES = {
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
}
# Timezone names for RFC2822 obs-zone
# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
TIMEZONE_NAMES = {
'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
'EST': -5, 'EDT': -4, # Eastern
'CST': -6, 'CDT': -5, # Central
'MST': -7, 'MDT': -6, # Mountain
'PST': -8, 'PDT': -7 # Pacific
}
KNOWN_EXTENSIONS = (
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
'flv', 'f4v', 'f4a', 'f4b',
@ -1735,12 +1746,17 @@ DATE_FORMATS = (
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y.%m.%d.',
'%Y/%m/%d',
'%Y/%m/%d %H:%M',
'%Y/%m/%d %H:%M:%S',
'%Y%m%d%H%M',
'%Y%m%d%H%M%S',
'%Y%m%d',
'%Y-%m-%d %H:%M',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%Y-%m-%d %H:%M:%S:%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
@ -1753,6 +1769,7 @@ DATE_FORMATS = (
'%b %d %Y at %H:%M:%S',
'%B %d %Y at %H:%M',
'%B %d %Y at %H:%M:%S',
'%H:%M %d-%b-%Y',
)
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@ -1763,6 +1780,7 @@ DATE_FORMATS_DAY_FIRST.extend([
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
'%d-%m-%Y %H:%M',
])
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
@ -2966,10 +2984,22 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
def extract_timezone(date_str):
m = re.search(
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
date_str)
r'''(?x)
^.{8,}? # >=8 char non-TZ prefix, if present
(?P<tz>Z| # just the UTC Z, or
(?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
(?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
[ ]? # optional space
(?P<sign>\+|-) # +/-
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
$)
''', date_str)
if not m:
timezone = datetime.timedelta()
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
if timezone is not None:
date_str = date_str[:-len(m.group('tz'))]
timezone = datetime.timedelta(hours=timezone or 0)
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'):
@ -3037,7 +3067,8 @@ def unified_timestamp(date_str, day_first=True):
if date_str is None:
return None
date_str = re.sub(r'[,|]', '', date_str)
date_str = re.sub(r'\s+', ' ', re.sub(
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str)
@ -3063,7 +3094,7 @@ def unified_timestamp(date_str, day_first=True):
pass
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
return calendar.timegm(timetuple) + pm_delta * 3600
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
def determine_ext(url, default_ext='unknown_video'):
@ -3673,13 +3704,11 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr:
if v is not None:
v = getattr(v, get_attr, None)
if v == '':
v = None
if v is None:
if v in (None, ''):
return default
try:
return int(v) * invscale // scale
except (ValueError, TypeError):
except (ValueError, TypeError, OverflowError):
return default