From 907530d6d3e2fb6a0a8e329584ec471c9694ed11 Mon Sep 17 00:00:00 2001 From: df Date: Mon, 4 Oct 2021 03:15:24 +0100 Subject: [PATCH] Add _match_valid_url() method to InfoExtractor per yt-dlp Also adjust lazy extractor setup to align and for Py2 --- devscripts/make_lazy_extractors.py | 14 ++++++++++---- youtube_dl/extractor/common.py | 18 ++++++++++++------ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 878ae72b1..efc7b0ed7 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -11,8 +11,12 @@ print('WARNING: Lazy loading extractors is an experimental feature that may not sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) lazy_extractors_filename = sys.argv[1] -if os.path.exists(lazy_extractors_filename): - os.remove(lazy_extractors_filename) +# Py2: may be confused by left-over lazy_extractors.pyc +for x in ('', 'c'): + try: + os.remove(lazy_extractors_filename + x) + except OSError: + pass from youtube_dl.extractor import _ALL_CLASSES from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor @@ -21,7 +25,9 @@ with open('devscripts/lazy_load_template.py', 'rt') as f: module_template = f.read() module_contents = [ - module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', + module_template, + (lambda cls: ('_match_valid_url' in cls.__dict__) and getsource(cls._match_valid_url))(InfoExtractor), + getsource(InfoExtractor.suitable), '', 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n'] ie_template = ''' @@ -94,7 +100,7 @@ for ie in ordered_cls: module_contents.append( '_ALL_CLASSES = [{0}]'.format(', '.join(names))) -module_src = '\n'.join(module_contents) + '\n' +module_src = '\n'.join(filter(lambda x: x is not None, module_contents)) + '\n' with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: f.write(module_src) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 797c35fd5..227c1f2fd 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -404,21 +404,27 @@ class InfoExtractor(object): self.set_downloader(downloader) @classmethod - def suitable(cls, url): - """Receives a URL and returns True if suitable for this IE.""" + def _match_valid_url(cls, url): + """Receives a URL and returns match against the IE's _VALID_URL.""" # This does not use has/getattr intentionally - we want to know whether # we have cached the regexp for *this* class, whereas getattr would also # match the superclass if '_VALID_URL_RE' not in cls.__dict__: cls._VALID_URL_RE = re.compile(cls._VALID_URL) - return cls._VALID_URL_RE.match(url) is not None + return cls._VALID_URL_RE.match(url) + + @classmethod + def suitable(cls, url): + """Receives a URL and returns True if suitable for this IE.""" + + return cls._match_valid_url(url) is not None @classmethod def _match_id(cls, url): - if '_VALID_URL_RE' not in cls.__dict__: - cls._VALID_URL_RE = re.compile(cls._VALID_URL) - m = cls._VALID_URL_RE.match(url) + """Extracts the ID from the URL""" + m = cls._match_valid_url(url) + # Must have matched in suitable() assert m return compat_str(m.group('id'))