[Callin] Add new extractor

This commit is contained in:
Ruowang Sun 2022-12-11 20:35:11 -05:00
commit f01cb57630

View file

@ -5,8 +5,10 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
traverse_obj, traverse_obj,
try_get,
) )
class CallinIE(InfoExtractor): class CallinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)' _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)'
_TESTS = [{ _TESTS = [{
@ -39,31 +41,29 @@ class CallinIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# webpage_json = self._download_json(url, video_id)
next_data = self._search_nextjs_data(webpage, video_id) next_data = self._search_nextjs_data(webpage, video_id)
valid = traverse_obj(next_data, ('props', 'pageProps', 'episode')) valid = traverse_obj(next_data, ('props', 'pageProps', 'episode'))
if not valid: if not valid:
raise ExtractorError('Failed to find m3u8') raise ExtractorError('Failed to find m3u8')
episode = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['episode'] episode = try_get(next_data, lambda x: x['props']['pageProps']['episode'], dict)
title = episode.get('title') title = episode.get('title')
if not title: if not title:
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
description = episode.get('description') description = episode.get('description')
if not description: if not description:
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
formats = [] formats = []
m3u8_url = episode.get('m3u8') m3u8_url = episode.get('m3u8')
if m3u8_url: if m3u8_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', fatal=False)) m3u8_url, video_id, 'mp4', fatal=False))
# self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'formats': formats, 'formats': formats,
} }