Add _match_valid_url() method to InfoExtractor per yt-dlp

Also adjust lazy extractor setup to align and for Py2
This commit is contained in:
df 2021-10-04 03:15:24 +01:00
commit 907530d6d3
2 changed files with 22 additions and 10 deletions

View file

@ -11,8 +11,12 @@ print('WARNING: Lazy loading extractors is an experimental feature that may not
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
lazy_extractors_filename = sys.argv[1] lazy_extractors_filename = sys.argv[1]
if os.path.exists(lazy_extractors_filename): # Py2: may be confused by left-over lazy_extractors.pyc
os.remove(lazy_extractors_filename) for x in ('', 'c'):
try:
os.remove(lazy_extractors_filename + x)
except OSError:
pass
from youtube_dl.extractor import _ALL_CLASSES from youtube_dl.extractor import _ALL_CLASSES
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
@ -21,7 +25,9 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
module_template = f.read() module_template = f.read()
module_contents = [ module_contents = [
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', module_template,
(lambda cls: ('_match_valid_url' in cls.__dict__) and getsource(cls._match_valid_url))(InfoExtractor),
getsource(InfoExtractor.suitable), '',
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n'] 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
ie_template = ''' ie_template = '''
@ -94,7 +100,7 @@ for ie in ordered_cls:
module_contents.append( module_contents.append(
'_ALL_CLASSES = [{0}]'.format(', '.join(names))) '_ALL_CLASSES = [{0}]'.format(', '.join(names)))
module_src = '\n'.join(module_contents) + '\n' module_src = '\n'.join(filter(lambda x: x is not None, module_contents)) + '\n'
with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
f.write(module_src) f.write(module_src)

View file

@ -404,21 +404,27 @@ class InfoExtractor(object):
self.set_downloader(downloader) self.set_downloader(downloader)
@classmethod @classmethod
def suitable(cls, url): def _match_valid_url(cls, url):
"""Receives a URL and returns True if suitable for this IE.""" """Receives a URL and returns match against the IE's _VALID_URL."""
# This does not use has/getattr intentionally - we want to know whether # This does not use has/getattr intentionally - we want to know whether
# we have cached the regexp for *this* class, whereas getattr would also # we have cached the regexp for *this* class, whereas getattr would also
# match the superclass # match the superclass
if '_VALID_URL_RE' not in cls.__dict__: if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL) cls._VALID_URL_RE = re.compile(cls._VALID_URL)
return cls._VALID_URL_RE.match(url) is not None return cls._VALID_URL_RE.match(url)
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
return cls._match_valid_url(url) is not None
@classmethod @classmethod
def _match_id(cls, url): def _match_id(cls, url):
if '_VALID_URL_RE' not in cls.__dict__: """Extracts the ID from the URL"""
cls._VALID_URL_RE = re.compile(cls._VALID_URL) m = cls._match_valid_url(url)
m = cls._VALID_URL_RE.match(url) # Must have matched in suitable()
assert m assert m
return compat_str(m.group('id')) return compat_str(m.group('id'))