Fix for HTTP Error 403 Forbidden

This fix was applied for mainly downloading playlists from youtube
music.
Each time the error is raised it will store the position in playlist at
which the download was interrupted by the error. And it will launch
again in a recursion.
The way it does this, is through recursion in the wrapper function
within function `__handle_extraction_exceptions`
This commit is contained in:
Youssef Hajjioui 2022-01-24 10:03:38 -05:00
commit 0b28897055
4 changed files with 22 additions and 1 deletions

View file

@ -41,6 +41,7 @@ from .compat import (
compat_urllib_error, compat_urllib_error,
compat_urllib_request, compat_urllib_request,
compat_urllib_request_DataHandler, compat_urllib_request_DataHandler,
Forbidden403
) )
from .utils import ( from .utils import (
age_restricted, age_restricted,
@ -112,6 +113,7 @@ from .version import __version__
if compat_os_name == 'nt': if compat_os_name == 'nt':
import ctypes import ctypes
playlist_pos_total = 0
class YoutubeDL(object): class YoutubeDL(object):
"""YoutubeDL class. """YoutubeDL class.
@ -361,6 +363,7 @@ class YoutubeDL(object):
} }
self.params.update(params) self.params.update(params)
self.cache = Cache(self) self.cache = Cache(self)
self.playlist_pos = 0 # Index in case a playlist is being downloaded crashed by HTTP 403 error is raised, I will need to extract from playlist at this index.
def check_deprecated(param, option, suggestion): def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None: if self.params.get(param) is not None:
@ -824,6 +827,12 @@ class YoutubeDL(object):
self.report_error(compat_str(e), e.format_traceback()) self.report_error(compat_str(e), e.format_traceback())
except MaxDownloadsReached: except MaxDownloadsReached:
raise raise
except Forbidden403 as err:
# Update the pos to start in playlist to the one at which the error occurred.
global playlist_pos_total
playlist_pos_total += err.playlist_pos
self.params['playliststart'] = playlist_pos_total
wrapper(self, *args, **kwargs) # Execute the wrapper function.
except Exception as e: except Exception as e:
if self.params.get('ignoreerrors', False): if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
@ -1040,6 +1049,7 @@ class YoutubeDL(object):
x_forwarded_for = ie_result.get('__x_forwarded_for_ip') x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
for i, entry in enumerate(entries, 1): for i, entry in enumerate(entries, 1):
self.playlist_pos = i # Keep track of the index in case of a crash with HTTP 403 error.
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
# This __x_forwarded_for_ip thing is a bit ugly but requires # This __x_forwarded_for_ip thing is a bit ugly but requires
# minimal changes # minimal changes
@ -1775,6 +1785,7 @@ class YoutubeDL(object):
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single resolved IE result.""" """Process a single resolved IE result."""
info_dict['playlist_pos'] = self.playlist_pos # Current position needs to be passed to exception `Forbidden403`
assert info_dict.get('_type', 'video') == 'video' assert info_dict.get('_type', 'video') == 'video'

View file

@ -481,4 +481,5 @@ def main(argv=None):
sys.exit('\nERROR: Interrupted by user') sys.exit('\nERROR: Interrupted by user')
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']

View file

@ -2997,6 +2997,12 @@ else:
def compat_ctypes_WINFUNCTYPE(*args, **kwargs): def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs) return ctypes.WINFUNCTYPE(*args, **kwargs)
# Implement an error for which it is raise when error code 403 HTTPError
class Forbidden403(Exception):
def __init__(self, playlist_pos):
self.playlist_pos = playlist_pos
def __str__(self):
return "Downloaded playlist has stopped at %d" % self.playlist_pos
__all__ = [ __all__ = [
'compat_HTMLParseError', 'compat_HTMLParseError',

View file

@ -11,6 +11,7 @@ from .common import FileDownloader
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_error, compat_urllib_error,
Forbidden403
) )
from ..utils import ( from ..utils import (
ContentTooShortError, ContentTooShortError,
@ -183,9 +184,11 @@ class HttpFD(FileDownloader):
ctx.resume_len = 0 ctx.resume_len = 0
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
return return
elif err.code < 500 or err.code >= 600: elif err.code != 403 and (err.code < 500 or err.code >= 600):
# Unexpected HTTP error # Unexpected HTTP error
raise raise
elif err.code == 403: # The famous 403 Forbidden error
raise Forbidden403(int(info_dict.get("playlist_pos")))
raise RetryDownload(err) raise RetryDownload(err)
except socket.error as err: except socket.error as err:
if err.errno != errno.ECONNRESET: if err.errno != errno.ECONNRESET: