mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-08-23 06:35:51 -07:00
[Movies2Watch] Add new extractor
This commit is contained in:
parent
d4701d8de2
commit
c1665414c5
1 changed files with 25 additions and 19 deletions
|
@ -4,35 +4,41 @@ from __future__ import unicode_literals
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
# https://movies2watch.ru/movie/double-threat-wqyq6/1-full
|
||||||
class Movies2WatchIE(InfoExtractor):
|
class Movies2WatchIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://movies2watch\.ru/movie/(?P<id>[^/?#&]+)/1-full'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://yourextractor.com/watch/42',
|
'url': 'https://movies2watch.ru/movie/double-threat-wqyq6/1-full',
|
||||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
'md5': 'c4ce357bf745d4d27ef7f3b94c9a5dc9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '42',
|
'id': 'double-threat-wqyq6',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video title goes here',
|
'title': 'Double Threat',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'description': 'After skimming money from the mob, a beautiful young woman finds herself on the run with a kind stranger on a pilgrimage across the country to scatter his brother\'s ashes. In the heat of the moment, we quickly learn that her split personality comes in handy as the ruthless, dynamic side of her is unstoppable.'
|
||||||
# TODO more properties, either as:
|
|
||||||
# * A value
|
|
||||||
# * MD5 checksum; start the string with md5:
|
|
||||||
# * A regular expression; start the string with re:
|
|
||||||
# * Any Python type (for example int or float)
|
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://movies2watch.ru/movie/the-batman-j2lx4/1-full',
|
||||||
|
'md5': 'a6824ac8f96cdbf839a258493384ea5e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-batman-j2lx4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Batman',
|
||||||
|
'description': 'Two years of nights have turned Bruce Wayne into a nocturnal animal. But as he continues to find his way as Gotham\'s dark knight Bruce is forced into a game of cat and mouse with his biggest threat so far, a manic killer known as "The Riddler" who is filled with rage and determined to expose the corrupt system whilst picking off all of Gotham\'s key political figures. Working with both established and new allies, Bruce must track down the killer and see him brought to justice, while investigating his father\'s true legacy and questioning the affect that he has had on Gotham so far as "The Batman."'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
# TODO more code goes here, for example ...
|
title = self._html_search_regex(r'<h1 itemprop="name" class="title">(.+?)</h1>', webpage, 'title')
|
||||||
title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
|
description = self._html_search_regex(r'<div itemprop="description" class="desc shorting" data-type="text">(.+?)</div>', webpage, 'description')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'url': url,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'description': description,
|
||||||
'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
|
'ext': 'mp4'
|
||||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
|
||||||
}
|
}
|
Loading…
Add table
Add a link
Reference in a new issue