From c1665414c5f4ea7e1ff3093054298de76e8951dc Mon Sep 17 00:00:00 2001 From: gntrazios Date: Mon, 20 Jun 2022 20:28:02 +0300 Subject: [PATCH] [Movies2Watch] Add new extractor --- youtube_dl/extractor/movies2watch.py | 44 ++++++++++++++++------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/movies2watch.py b/youtube_dl/extractor/movies2watch.py index 689497174..653350a2c 100644 --- a/youtube_dl/extractor/movies2watch.py +++ b/youtube_dl/extractor/movies2watch.py @@ -4,35 +4,41 @@ from __future__ import unicode_literals from .common import InfoExtractor +# https://movies2watch.ru/movie/double-threat-wqyq6/1-full class Movies2WatchIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' - _TEST = { - 'url': 'https://yourextractor.com/watch/42', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + _VALID_URL = r'https?://movies2watch\.ru/movie/(?P[^/?#&]+)/1-full' + _TESTS = [{ + 'url': 'https://movies2watch.ru/movie/double-threat-wqyq6/1-full', + 'md5': 'c4ce357bf745d4d27ef7f3b94c9a5dc9', 'info_dict': { - 'id': '42', + 'id': 'double-threat-wqyq6', 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) + 'title': 'Double Threat', + 'description': 'After skimming money from the mob, a beautiful young woman finds herself on the run with a kind stranger on a pilgrimage across the country to scatter his brother\'s ashes. In the heat of the moment, we quickly learn that her split personality comes in handy as the ruthless, dynamic side of her is unstoppable.' } - } + }, { + 'url': 'https://movies2watch.ru/movie/the-batman-j2lx4/1-full', + 'md5': 'a6824ac8f96cdbf839a258493384ea5e', + 'info_dict': { + 'id': 'the-batman-j2lx4', + 'ext': 'mp4', + 'title': 'The Batman', + 'description': 'Two years of nights have turned Bruce Wayne into a nocturnal animal. But as he continues to find his way as Gotham\'s dark knight Bruce is forced into a game of cat and mouse with his biggest threat so far, a manic killer known as "The Riddler" who is filled with rage and determined to expose the corrupt system whilst picking off all of Gotham\'s key political figures. Working with both established and new allies, Bruce must track down the killer and see him brought to justice, while investigating his father\'s true legacy and questioning the affect that he has had on Gotham so far as "The Batman."' + } + }] def _real_extract(self, url): + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - # TODO more code goes here, for example ... - title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + description = self._html_search_regex(r'
(.+?)
', webpage, 'description') return { + 'url': url, 'id': video_id, 'title': title, - 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) - } \ No newline at end of file + 'description': description, + 'ext': 'mp4' + }