From 907530d6d3e2fb6a0a8e329584ec471c9694ed11 Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Mon, 4 Oct 2021 03:15:24 +0100
Subject: [PATCH] Add _match_valid_url() method to InfoExtractor per yt-dlp

Also adjust lazy extractor setup to align and for Py2
---
 devscripts/make_lazy_extractors.py | 14 ++++++++++----
 youtube_dl/extractor/common.py     | 18 ++++++++++++------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 878ae72b1..efc7b0ed7 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -11,8 +11,12 @@ print('WARNING: Lazy loading extractors is an experimental feature that may not
 sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
 
 lazy_extractors_filename = sys.argv[1]
-if os.path.exists(lazy_extractors_filename):
-    os.remove(lazy_extractors_filename)
+# Py2: may be confused by left-over lazy_extractors.pyc
+for x in ('', 'c'):
+    try:
+        os.remove(lazy_extractors_filename + x)
+    except OSError:
+        pass
 
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
@@ -21,7 +25,9 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
     module_template = f.read()
 
 module_contents = [
-    module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
+    module_template,
+    (lambda cls: ('_match_valid_url' in cls.__dict__) and getsource(cls._match_valid_url))(InfoExtractor),
+    getsource(InfoExtractor.suitable), '',
     'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n']
 
 ie_template = '''
@@ -94,7 +100,7 @@ for ie in ordered_cls:
 module_contents.append(
     '_ALL_CLASSES = [{0}]'.format(', '.join(names)))
 
-module_src = '\n'.join(module_contents) + '\n'
+module_src = '\n'.join(filter(lambda x: x is not None, module_contents)) + '\n'
 
 with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
     f.write(module_src)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 797c35fd5..227c1f2fd 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -404,21 +404,27 @@ class InfoExtractor(object):
         self.set_downloader(downloader)
 
     @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
+    def _match_valid_url(cls, url):
+        """Receives a URL and returns match against the IE's _VALID_URL."""
 
         # This does not use has/getattr intentionally - we want to know whether
         # we have cached the regexp for *this* class, whereas getattr would also
         # match the superclass
         if '_VALID_URL_RE' not in cls.__dict__:
             cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        return cls._VALID_URL_RE.match(url) is not None
+        return cls._VALID_URL_RE.match(url)
+
+    @classmethod
+    def suitable(cls, url):
+        """Receives a URL and returns True if suitable for this IE."""
+
+        return cls._match_valid_url(url) is not None
 
     @classmethod
     def _match_id(cls, url):
-        if '_VALID_URL_RE' not in cls.__dict__:
-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        m = cls._VALID_URL_RE.match(url)
+        """Extracts the ID from the URL"""
+        m = cls._match_valid_url(url)
+        # Must have matched in suitable()
         assert m
         return compat_str(m.group('id'))