[utils] Rework URL path munging for ., .. components

* move processing to YoutubeDLHandler
* also process `Location` header for redirect
* use tests from https://github.com/yt-dlp/yt-dlp/pull/7662
This commit is contained in:
dirkf 2023-07-28 06:19:15 +01:00
commit abef53466d
4 changed files with 95 additions and 41 deletions

View file

@ -71,7 +71,6 @@ from .utils import (
format_bytes,
formatSeconds,
GeoRestrictedError,
HEADRequest,
int_or_none,
ISO3166Utils,
join_nonempty,
@ -88,7 +87,6 @@ from .utils import (
preferredencoding,
prepend_extension,
process_communicate_or_kill,
PUTRequest,
register_socks_protocols,
render_table,
replace_extension,
@ -2460,27 +2458,6 @@ class YoutubeDL(object):
""" Start an HTTP download """
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
# an embedded /../ sequence is not automatically handled by urllib2
# see https://github.com/yt-dlp/yt-dlp/issues/3355
url = req.get_full_url()
parts = url.partition('/../')
if parts[1]:
url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
if url:
# worse, URL path may have initial /../ against RFCs: work-around
# by stripping such prefixes, like eg Firefox
parts = compat_urllib_parse.urlsplit(url)
path = parts.path
while path.startswith('/../'):
path = path[3:]
url = parts._replace(path=path).geturl()
# get a new Request with the munged URL
if url != req.get_full_url():
req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
req.get_method(), compat_urllib_request.Request)
req = req_type(
url, data=req.data, headers=dict(req.header_items()),
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self):