[Movies2Watch] Add new extractor

2025-08-23 06:35:51 -07:00 · 2022-06-20 20:28:02 +03:00 · 2022-06-20 20:28:02 +03:00 · c1665414c5
commit c1665414c5
parent d4701d8de2
1 changed files with 25 additions and 19 deletions
--- a/youtube_dl/extractor/movies2watch.py
+++ b/youtube_dl/extractor/movies2watch.py
@ -4,35 +4,41 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 # https://movies2watch.ru/movie/double-threat-wqyq6/1-full
 class Movies2WatchIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://movies2watch\.ru/movie/(?P<id>[^/?#&]+)/1-full'
-    _TEST = {
+    _TESTS = [{
-        'url': 'https://yourextractor.com/watch/42',
+        'url': 'https://movies2watch.ru/movie/double-threat-wqyq6/1-full',
-        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+        'md5': 'c4ce357bf745d4d27ef7f3b94c9a5dc9',
        'info_dict': {
-            'id': '42',
+            'id': 'double-threat-wqyq6',
            'ext': 'mp4',
-            'title': 'Video title goes here',
+            'title': 'Double Threat',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'After skimming money from the mob, a beautiful young woman finds herself on the run with a kind stranger on a pilgrimage across the country to scatter his brother\'s ashes. In the heat of the moment, we quickly learn that her split personality comes in handy as the ruthless, dynamic side of her is unstoppable.'
            # TODO more properties, either as:
            # * A value
            # * MD5 checksum; start the string with md5:
            # * A regular expression; start the string with re:
            # * Any Python type (for example int or float)
        }
-    }
+    }, {
        'url': 'https://movies2watch.ru/movie/the-batman-j2lx4/1-full',
        'md5': 'a6824ac8f96cdbf839a258493384ea5e',
        'info_dict': {
            'id': 'the-batman-j2lx4',
            'ext': 'mp4',
            'title': 'The Batman',
            'description': 'Two years of nights have turned Bruce Wayne into a nocturnal animal. But as he continues to find his way as Gotham\'s dark knight Bruce is forced into a game of cat and mouse with his biggest threat so far, a manic killer known as "The Riddler" who is filled with rage and determined to expose the corrupt system whilst picking off all of Gotham\'s key political figures. Working with both established and new allies, Bruce must track down the killer and see him brought to justice, while investigating his father\'s true legacy and questioning the affect that he has had on Gotham so far as "The Batman."'
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        # TODO more code goes here, for example ...
+        title = self._html_search_regex(r'<h1 itemprop="name" class="title">(.+?)</h1>', webpage, 'title')
-        title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+        description = self._html_search_regex(r'<div itemprop="description" class="desc shorting" data-type="text">(.+?)</div>', webpage, 'description')
        return {
            'url': url,
            'id': video_id,
            'title': title,
-            'description': self._og_search_description(webpage),
+            'description': description,
-            'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
+            'ext': 'mp4'
            # TODO more properties (see youtube_dl/extractor/common.py)
        }