From 0b2889705574620ac2d7d556956693bb4eb0a881 Mon Sep 17 00:00:00 2001 From: Youssef Hajjioui Date: Mon, 24 Jan 2022 10:03:38 -0500 Subject: [PATCH] Fix for HTTP Error 403 Forbidden This fix was applied for mainly downloading playlists from youtube music. Each time the error is raised it will store the position in playlist at which the download was interrupted by the error. And it will launch again in a recursion. The way it does this, is through recursion in the wrapper function within function `__handle_extraction_exceptions` --- youtube_dl/YoutubeDL.py | 11 +++++++++++ youtube_dl/__init__.py | 1 + youtube_dl/compat.py | 6 ++++++ youtube_dl/downloader/http.py | 5 ++++- 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index fe30758ef..4a3cefb3c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -41,6 +41,7 @@ from .compat import ( compat_urllib_error, compat_urllib_request, compat_urllib_request_DataHandler, + Forbidden403 ) from .utils import ( age_restricted, @@ -112,6 +113,7 @@ from .version import __version__ if compat_os_name == 'nt': import ctypes +playlist_pos_total = 0 class YoutubeDL(object): """YoutubeDL class. @@ -361,6 +363,7 @@ class YoutubeDL(object): } self.params.update(params) self.cache = Cache(self) + self.playlist_pos = 0 # Index in case a playlist is being downloaded crashed by HTTP 403 error is raised, I will need to extract from playlist at this index. def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: @@ -824,6 +827,12 @@ class YoutubeDL(object): self.report_error(compat_str(e), e.format_traceback()) except MaxDownloadsReached: raise + except Forbidden403 as err: + # Update the pos to start in playlist to the one at which the error occurred. + global playlist_pos_total + playlist_pos_total += err.playlist_pos + self.params['playliststart'] = playlist_pos_total + wrapper(self, *args, **kwargs) # Execute the wrapper function. except Exception as e: if self.params.get('ignoreerrors', False): self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) @@ -1040,6 +1049,7 @@ class YoutubeDL(object): x_forwarded_for = ie_result.get('__x_forwarded_for_ip') for i, entry in enumerate(entries, 1): + self.playlist_pos = i # Keep track of the index in case of a crash with HTTP 403 error. self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes @@ -1775,6 +1785,7 @@ class YoutubeDL(object): def process_info(self, info_dict): """Process a single resolved IE result.""" + info_dict['playlist_pos'] = self.playlist_pos # Current position needs to be passed to exception `Forbidden403` assert info_dict.get('_type', 'video') == 'video' diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e1bd67919..fcd619373 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -481,4 +481,5 @@ def main(argv=None): sys.exit('\nERROR: Interrupted by user') + __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 9e45c454b..6d5df3f74 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2997,6 +2997,12 @@ else: def compat_ctypes_WINFUNCTYPE(*args, **kwargs): return ctypes.WINFUNCTYPE(*args, **kwargs) +# Implement an error for which it is raise when error code 403 HTTPError +class Forbidden403(Exception): + def __init__(self, playlist_pos): + self.playlist_pos = playlist_pos + def __str__(self): + return "Downloaded playlist has stopped at %d" % self.playlist_pos __all__ = [ 'compat_HTMLParseError', diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index d8ac41dcc..6254beada 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -11,6 +11,7 @@ from .common import FileDownloader from ..compat import ( compat_str, compat_urllib_error, + Forbidden403 ) from ..utils import ( ContentTooShortError, @@ -183,9 +184,11 @@ class HttpFD(FileDownloader): ctx.resume_len = 0 ctx.open_mode = 'wb' return - elif err.code < 500 or err.code >= 600: + elif err.code != 403 and (err.code < 500 or err.code >= 600): # Unexpected HTTP error raise + elif err.code == 403: # The famous 403 Forbidden error + raise Forbidden403(int(info_dict.get("playlist_pos"))) raise RetryDownload(err) except socket.error as err: if err.errno != errno.ECONNRESET: