Add _match_valid_url() method to InfoExtractor per yt-dlp

Also adjust lazy extractor setup to align and for Py2
2025-08-23 06:35:51 -07:00 · 2021-10-04 03:15:24 +01:00 · 2021-10-04 03:15:24 +01:00 · 907530d6d3
commit 907530d6d3
parent a803582717
2 changed files with 22 additions and 10 deletions
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@ -11,8 +11,12 @@ print('WARNING: Lazy loading extractors is an experimental feature that may not
 sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
 lazy_extractors_filename = sys.argv[1]
-if os.path.exists(lazy_extractors_filename):
+# Py2: may be confused by left-over lazy_extractors.pyc
-    os.remove(lazy_extractors_filename)
+for x in ('', 'c'):
    try:
        os.remove(lazy_extractors_filename + x)
    except OSError:
        pass
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
@ -21,7 +25,9 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
    module_template = f.read()
 module_contents = [
-    module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
+    module_template,
    (lambda cls: ('_match_valid_url' in cls.__dict__) and getsource(cls._match_valid_url))(InfoExtractor),
    getsource(InfoExtractor.suitable), '',
    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n']
 ie_template = '''
@ -94,7 +100,7 @@ for ie in ordered_cls:
 module_contents.append(
    '_ALL_CLASSES = [{0}]'.format(', '.join(names)))
-module_src = '\n'.join(module_contents) + '\n'
+module_src = '\n'.join(filter(lambda x: x is not None, module_contents)) + '\n'
 with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
    f.write(module_src)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -404,21 +404,27 @@ class InfoExtractor(object):
        self.set_downloader(downloader)
    @classmethod
-    def suitable(cls, url):
+    def _match_valid_url(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
+        """Receives a URL and returns match against the IE's _VALID_URL."""
        # This does not use has/getattr intentionally - we want to know whether
        # we have cached the regexp for *this* class, whereas getattr would also
        # match the superclass
        if '_VALID_URL_RE' not in cls.__dict__:
            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        return cls._VALID_URL_RE.match(url) is not None
+        return cls._VALID_URL_RE.match(url)
    @classmethod
    def suitable(cls, url):
        """Receives a URL and returns True if suitable for this IE."""
        return cls._match_valid_url(url) is not None
    @classmethod
    def _match_id(cls, url):
-        if '_VALID_URL_RE' not in cls.__dict__:
+        """Extracts the ID from the URL"""
-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+        m = cls._match_valid_url(url)
-        m = cls._VALID_URL_RE.match(url)
+        # Must have matched in suitable()
        assert m
        return compat_str(m.group('id'))