From 0b2889705574620ac2d7d556956693bb4eb0a881 Mon Sep 17 00:00:00 2001 From: Youssef Hajjioui Date: Mon, 24 Jan 2022 10:03:38 -0500 Subject: [PATCH 1/8] Fix for HTTP Error 403 Forbidden This fix was applied for mainly downloading playlists from youtube music. Each time the error is raised it will store the position in playlist at which the download was interrupted by the error. And it will launch again in a recursion. The way it does this, is through recursion in the wrapper function within function `__handle_extraction_exceptions` --- youtube_dl/YoutubeDL.py | 11 +++++++++++ youtube_dl/__init__.py | 1 + youtube_dl/compat.py | 6 ++++++ youtube_dl/downloader/http.py | 5 ++++- 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index fe30758ef..4a3cefb3c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -41,6 +41,7 @@ from .compat import ( compat_urllib_error, compat_urllib_request, compat_urllib_request_DataHandler, + Forbidden403 ) from .utils import ( age_restricted, @@ -112,6 +113,7 @@ from .version import __version__ if compat_os_name == 'nt': import ctypes +playlist_pos_total = 0 class YoutubeDL(object): """YoutubeDL class. @@ -361,6 +363,7 @@ class YoutubeDL(object): } self.params.update(params) self.cache = Cache(self) + self.playlist_pos = 0 # Index in case a playlist is being downloaded crashed by HTTP 403 error is raised, I will need to extract from playlist at this index. def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: @@ -824,6 +827,12 @@ class YoutubeDL(object): self.report_error(compat_str(e), e.format_traceback()) except MaxDownloadsReached: raise + except Forbidden403 as err: + # Update the pos to start in playlist to the one at which the error occurred. + global playlist_pos_total + playlist_pos_total += err.playlist_pos + self.params['playliststart'] = playlist_pos_total + wrapper(self, *args, **kwargs) # Execute the wrapper function. except Exception as e: if self.params.get('ignoreerrors', False): self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) @@ -1040,6 +1049,7 @@ class YoutubeDL(object): x_forwarded_for = ie_result.get('__x_forwarded_for_ip') for i, entry in enumerate(entries, 1): + self.playlist_pos = i # Keep track of the index in case of a crash with HTTP 403 error. self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes @@ -1775,6 +1785,7 @@ class YoutubeDL(object): def process_info(self, info_dict): """Process a single resolved IE result.""" + info_dict['playlist_pos'] = self.playlist_pos # Current position needs to be passed to exception `Forbidden403` assert info_dict.get('_type', 'video') == 'video' diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e1bd67919..fcd619373 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -481,4 +481,5 @@ def main(argv=None): sys.exit('\nERROR: Interrupted by user') + __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 9e45c454b..6d5df3f74 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2997,6 +2997,12 @@ else: def compat_ctypes_WINFUNCTYPE(*args, **kwargs): return ctypes.WINFUNCTYPE(*args, **kwargs) +# Implement an error for which it is raise when error code 403 HTTPError +class Forbidden403(Exception): + def __init__(self, playlist_pos): + self.playlist_pos = playlist_pos + def __str__(self): + return "Downloaded playlist has stopped at %d" % self.playlist_pos __all__ = [ 'compat_HTMLParseError', diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index d8ac41dcc..6254beada 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -11,6 +11,7 @@ from .common import FileDownloader from ..compat import ( compat_str, compat_urllib_error, + Forbidden403 ) from ..utils import ( ContentTooShortError, @@ -183,9 +184,11 @@ class HttpFD(FileDownloader): ctx.resume_len = 0 ctx.open_mode = 'wb' return - elif err.code < 500 or err.code >= 600: + elif err.code != 403 and (err.code < 500 or err.code >= 600): # Unexpected HTTP error raise + elif err.code == 403: # The famous 403 Forbidden error + raise Forbidden403(int(info_dict.get("playlist_pos"))) raise RetryDownload(err) except socket.error as err: if err.errno != errno.ECONNRESET: From 8cc6857f22387d10eec04af25e67c0a7d3de435d Mon Sep 17 00:00:00 2001 From: Youssef Hajjioui Date: Thu, 27 Jan 2022 16:01:25 -0500 Subject: [PATCH 2/8] Added configuration files --- youtube-dl | 6 ++++++ youtube-dl.config | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100755 youtube-dl create mode 100644 youtube-dl.config diff --git a/youtube-dl b/youtube-dl new file mode 100755 index 000000000..fc3cc8ad8 --- /dev/null +++ b/youtube-dl @@ -0,0 +1,6 @@ +#!/usr/bin/env python + +import youtube_dl + +if __name__ == '__main__': + youtube_dl.main() diff --git a/youtube-dl.config b/youtube-dl.config new file mode 100644 index 000000000..7cb1320da --- /dev/null +++ b/youtube-dl.config @@ -0,0 +1,33 @@ +# File selection +--playlist-start 1 + +# Download options +--retries 3 +#--limit-rate nnn +# +# Filesystem options +#--batch-file files_here_of_urls +--output %(title)s.%(ext).s # The filename will hold the video title. And the extension at the end. +#--restrict-filenames # There will be no arabic song names +--no-overwrites # I may want to use this one in case I don't want to download the same files upon launching the script a second time. +--continue # Force to resume partially downloaded files. I say this is an experimental options and I want to check if the files I have downloaded are functional. +#--no-cache-dir + +# Verbosity and simulation +# --quiet # because this is a script +#--simulate # won't download, but just show what will happen, I may want to use this option in the beginning in order to verify what youtube-dl would do. Could be used together with `-v`. +#--verbose # for verbosity, prints debugging information. +#--print-traffic # I think this is good information for debugging. +#--dump-pages + +# Workarounds +#--sleep-interval 198 # Sleeping before attempting the next download. + +# Video Format Options +#--format worst +# Authentication + +# Post-processing options: +--add-metadata # Write all the metadata that could be gotten into the output file. +--no-post-overwrites # Do not overwrite post processed files (Keep audio files). This might save me time when I just need to update +-x # Convert videos to audio. If set it will not recognized already downloaded files, because they are converted in audio format, and so they will be overwritten when retrying after a failure. From 49fccd687bc07963c4c9afbe90474b9121705af9 Mon Sep 17 00:00:00 2001 From: Delusive Miraj Date: Fri, 28 Jan 2022 17:54:04 -0500 Subject: [PATCH 3/8] I set a time to sleep between each retry I set to sleep for a few seconds before each retry. --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4a3cefb3c..1eacea152 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -832,6 +832,7 @@ class YoutubeDL(object): global playlist_pos_total playlist_pos_total += err.playlist_pos self.params['playliststart'] = playlist_pos_total + time.sleep(random.randint(3, 13)) # Lets not abuse of this 😜 we don't want to get brutal, on the server. wrapper(self, *args, **kwargs) # Execute the wrapper function. except Exception as e: if self.params.get('ignoreerrors', False): From ffa56f360345fb67f4ee24021d9130c908084453 Mon Sep 17 00:00:00 2001 From: Delusive Miraj Date: Fri, 28 Jan 2022 17:54:04 -0500 Subject: [PATCH 4/8] I set a time to sleep between each retry I set to sleep for a few seconds before each retry. --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4a3cefb3c..9cbe3fc36 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -832,6 +832,7 @@ class YoutubeDL(object): global playlist_pos_total playlist_pos_total += err.playlist_pos self.params['playliststart'] = playlist_pos_total + time.sleep(random.randint(3, 13)) # Lets not abuse of this 😜 we don't want to get brutal on the server. wrapper(self, *args, **kwargs) # Execute the wrapper function. except Exception as e: if self.params.get('ignoreerrors', False): From dcb656f8c64c7509d779d38b4bd21e92dd912af3 Mon Sep 17 00:00:00 2001 From: Youssef Hajjioui Date: Sat, 29 Jan 2022 12:34:31 -0500 Subject: [PATCH 5/8] Removed cached files, in the index. I removed some files from the index. Files which are irrelevant for the PR. --- youtube-dl | 6 ------ youtube-dl.config | 33 --------------------------------- 2 files changed, 39 deletions(-) delete mode 100755 youtube-dl delete mode 100644 youtube-dl.config diff --git a/youtube-dl b/youtube-dl deleted file mode 100755 index fc3cc8ad8..000000000 --- a/youtube-dl +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python - -import youtube_dl - -if __name__ == '__main__': - youtube_dl.main() diff --git a/youtube-dl.config b/youtube-dl.config deleted file mode 100644 index 7cb1320da..000000000 --- a/youtube-dl.config +++ /dev/null @@ -1,33 +0,0 @@ -# File selection ---playlist-start 1 - -# Download options ---retries 3 -#--limit-rate nnn -# -# Filesystem options -#--batch-file files_here_of_urls ---output %(title)s.%(ext).s # The filename will hold the video title. And the extension at the end. -#--restrict-filenames # There will be no arabic song names ---no-overwrites # I may want to use this one in case I don't want to download the same files upon launching the script a second time. ---continue # Force to resume partially downloaded files. I say this is an experimental options and I want to check if the files I have downloaded are functional. -#--no-cache-dir - -# Verbosity and simulation -# --quiet # because this is a script -#--simulate # won't download, but just show what will happen, I may want to use this option in the beginning in order to verify what youtube-dl would do. Could be used together with `-v`. -#--verbose # for verbosity, prints debugging information. -#--print-traffic # I think this is good information for debugging. -#--dump-pages - -# Workarounds -#--sleep-interval 198 # Sleeping before attempting the next download. - -# Video Format Options -#--format worst -# Authentication - -# Post-processing options: ---add-metadata # Write all the metadata that could be gotten into the output file. ---no-post-overwrites # Do not overwrite post processed files (Keep audio files). This might save me time when I just need to update --x # Convert videos to audio. If set it will not recognized already downloaded files, because they are converted in audio format, and so they will be overwritten when retrying after a failure. From 6d447d14c71a5f72769aa760325f281a0897f3e8 Mon Sep 17 00:00:00 2001 From: Youssef Hajjioui Date: Sat, 29 Jan 2022 19:06:52 -0500 Subject: [PATCH 6/8] I mande a clean up I remove some the code that I previously added which is not needed to fix the error. (comments, tests...) --- bin/youtube-dl | 3 +++ youtube_dl/YoutubeDL.py | 14 +++----------- youtube_dl/compat.py | 5 +---- youtube_dl/downloader/http.py | 2 +- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/bin/youtube-dl b/bin/youtube-dl index fc3cc8ad8..04fc04aae 100755 --- a/bin/youtube-dl +++ b/bin/youtube-dl @@ -1,6 +1,9 @@ #!/usr/bin/env python import youtube_dl +import sys + +sys.argv = ["", "--config-location", "youtube-dl.config", "https://music.youtube.com/playlist?list=PLmq8d_1q7d1Wp4kfF3DUlaWoQK0PM_D76&feature=share"] if __name__ == '__main__': youtube_dl.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9cbe3fc36..018c680d7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -113,8 +113,6 @@ from .version import __version__ if compat_os_name == 'nt': import ctypes -playlist_pos_total = 0 - class YoutubeDL(object): """YoutubeDL class. @@ -363,7 +361,6 @@ class YoutubeDL(object): } self.params.update(params) self.cache = Cache(self) - self.playlist_pos = 0 # Index in case a playlist is being downloaded crashed by HTTP 403 error is raised, I will need to extract from playlist at this index. def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: @@ -828,12 +825,9 @@ class YoutubeDL(object): except MaxDownloadsReached: raise except Forbidden403 as err: - # Update the pos to start in playlist to the one at which the error occurred. - global playlist_pos_total - playlist_pos_total += err.playlist_pos - self.params['playliststart'] = playlist_pos_total - time.sleep(random.randint(3, 13)) # Lets not abuse of this 😜 we don't want to get brutal on the server. - wrapper(self, *args, **kwargs) # Execute the wrapper function. + # Lets not abuse of this 😜 we don't want to get brutal on the server + time.sleep(random.randint(3, 13)) + wrapper(self, *args, **kwargs) except Exception as e: if self.params.get('ignoreerrors', False): self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) @@ -1050,7 +1044,6 @@ class YoutubeDL(object): x_forwarded_for = ie_result.get('__x_forwarded_for_ip') for i, entry in enumerate(entries, 1): - self.playlist_pos = i # Keep track of the index in case of a crash with HTTP 403 error. self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes @@ -1786,7 +1779,6 @@ class YoutubeDL(object): def process_info(self, info_dict): """Process a single resolved IE result.""" - info_dict['playlist_pos'] = self.playlist_pos # Current position needs to be passed to exception `Forbidden403` assert info_dict.get('_type', 'video') == 'video' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 6d5df3f74..d83873339 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2999,10 +2999,7 @@ else: # Implement an error for which it is raise when error code 403 HTTPError class Forbidden403(Exception): - def __init__(self, playlist_pos): - self.playlist_pos = playlist_pos - def __str__(self): - return "Downloaded playlist has stopped at %d" % self.playlist_pos + pass __all__ = [ 'compat_HTMLParseError', diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 6254beada..f8f5cb9fa 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -188,7 +188,7 @@ class HttpFD(FileDownloader): # Unexpected HTTP error raise elif err.code == 403: # The famous 403 Forbidden error - raise Forbidden403(int(info_dict.get("playlist_pos"))) + raise Forbidden403 raise RetryDownload(err) except socket.error as err: if err.errno != errno.ECONNRESET: From a394706c835ce353098eb329e86a258be2bc220a Mon Sep 17 00:00:00 2001 From: Youssef Hajjioui Date: Sat, 29 Jan 2022 19:38:32 -0500 Subject: [PATCH 7/8] Made my changes compliant to conventions I used flake8 to make my code compliant to conventions used in the project, and the community. --- youtube_dl/YoutubeDL.py | 3 ++- youtube_dl/__init__.py | 1 - youtube_dl/compat.py | 4 +++- youtube_dl/downloader/http.py | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 018c680d7..6a60b9fb6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -113,6 +113,7 @@ from .version import __version__ if compat_os_name == 'nt': import ctypes + class YoutubeDL(object): """YoutubeDL class. @@ -824,7 +825,7 @@ class YoutubeDL(object): self.report_error(compat_str(e), e.format_traceback()) except MaxDownloadsReached: raise - except Forbidden403 as err: + except Forbidden403: # Lets not abuse of this 😜 we don't want to get brutal on the server time.sleep(random.randint(3, 13)) wrapper(self, *args, **kwargs) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index fcd619373..e1bd67919 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -481,5 +481,4 @@ def main(argv=None): sys.exit('\nERROR: Interrupted by user') - __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index d83873339..90a178c73 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2997,9 +2997,11 @@ else: def compat_ctypes_WINFUNCTYPE(*args, **kwargs): return ctypes.WINFUNCTYPE(*args, **kwargs) + # Implement an error for which it is raise when error code 403 HTTPError class Forbidden403(Exception): - pass + pass + __all__ = [ 'compat_HTMLParseError', diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index f8f5cb9fa..86d6f3441 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -187,7 +187,7 @@ class HttpFD(FileDownloader): elif err.code != 403 and (err.code < 500 or err.code >= 600): # Unexpected HTTP error raise - elif err.code == 403: # The famous 403 Forbidden error + elif err.code == 403: # The famous 403 Forbidden error raise Forbidden403 raise RetryDownload(err) except socket.error as err: From 080f1f408cb3bbfc4beea9f029a77590d08fd682 Mon Sep 17 00:00:00 2001 From: Youssef Hajjioui Date: Sat, 29 Jan 2022 20:08:08 -0500 Subject: [PATCH 8/8] Corrected modifiations I made for testing purposes I deleted some code, that I used for testing purposes. Sorry, for another commit, everyone. --- bin/youtube-dl | 3 --- 1 file changed, 3 deletions(-) diff --git a/bin/youtube-dl b/bin/youtube-dl index 04fc04aae..fc3cc8ad8 100755 --- a/bin/youtube-dl +++ b/bin/youtube-dl @@ -1,9 +1,6 @@ #!/usr/bin/env python import youtube_dl -import sys - -sys.argv = ["", "--config-location", "youtube-dl.config", "https://music.youtube.com/playlist?list=PLmq8d_1q7d1Wp4kfF3DUlaWoQK0PM_D76&feature=share"] if __name__ == '__main__': youtube_dl.main()