diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ae2a6b8b0..0a65806f0 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -136,10 +136,13 @@ - **BusinessInsider** - **BuzzFeed** - **BYUtv** + - **Cambro** - **Camdemy** - **CamdemyFolder** + - **Camhub** - **CamModels** - **CamTube** + - **CamWhores** - **CamWithHer** - **canalc2.tv** - **Canalplus**: mycanal.fr and piwiplus.fr @@ -644,6 +647,7 @@ - **NRKTVSeries** - **NRLTV** - **ntv.ru** + - **Nudespree** - **Nuvid** - **NYTimes** - **NYTimesArticle** diff --git a/test/test_ktplayer.py b/test/test_ktplayer.py new file mode 100644 index 000000000..5ccd392b3 --- /dev/null +++ b/test/test_ktplayer.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.extractor.ktplayer import KtPlayerHelper + + +class TestKtPlayerHelper(unittest.TestCase): + def test_kt_player_helper_lc(self): + self.assertEqual( + KtPlayerHelper._hash_kt_player_lic_code('$385023312702592'), + '49618502835613441220119020166725') + self.assertEqual( + KtPlayerHelper._hash_kt_player_lic_code('$518170117095338'), + '62924140695851455899788411700698') + + def test_kt_player_helper_hash_convert(self): + self.assertEqual( + KtPlayerHelper.convert_video_hash('$385023312702592', 'bed397181d043299c43f63582406a20b'), + '8b0bdf194430202ed49325c186633a79') + self.assertEqual( + KtPlayerHelper.convert_video_hash('$518170117095338', '8b25b576ffbf46fa3dc91e34eddc2190b7d3146586'), + 'f34c6dff1f890e75b6b59422dde3b1acb7d3146586') + + def test_get_url(self): + page1 = """ + var flashvars = { + license_code: '$385023312702592', + video_url: 'http://example.com/get_file/2/bed397181d043299c43f63582406a20b/223000/223101/223101.mp4/', + } + """ + self.assertEqual( + KtPlayerHelper.get_url(page1), + 'http://example.com/get_file/2/8b0bdf194430202ed49325c186633a79/223000/223101/223101.mp4/') + + page2 = """ + var flashvars = { + license_code: '$518170117095338', + video_url: 'http://example.com/get_file/2/8b25b576ffbf46fa3dc91e34eddc2190b7d3146586/223000/223101/223101.mp4/', + } + """ + self.assertEqual( + KtPlayerHelper.get_url(page2), + 'http://example.com/get_file/2/f34c6dff1f890e75b6b59422dde3b1acb7d3146586/223000/223101/223101.mp4/') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 259c4763e..d9ff59972 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Various small unit tests +import datetime import io import json import xml.etree.ElementTree @@ -18,6 +19,7 @@ import xml.etree.ElementTree from youtube_dl.utils import ( age_restricted, args_to_str, + date_from_ago, encode_base_n, caesar, clean_html, @@ -1475,6 +1477,45 @@ Line 1 self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') + def test_date_from_ago(self): + self.assertIsNone(date_from_ago(None)) + self.assertIsNone(date_from_ago('')) + self.assertIsNone(date_from_ago('invalid')) + self.assertIsNone(date_from_ago('1 microsecond ago')) + self.assertIsNone(date_from_ago('five days ago')) + + self.assertEqual( + date_from_ago('1 minute ago'), + (datetime.datetime.utcnow() - datetime.timedelta(minutes=1)).strftime('%Y%m%d')) + + self.assertEqual( + date_from_ago('1 Minute Ago'), + (datetime.datetime.utcnow() - datetime.timedelta(minutes=1)).strftime('%Y%m%d')) + + self.assertEqual( + date_from_ago('2 minutes ago'), + (datetime.datetime.utcnow() - datetime.timedelta(minutes=2)).strftime('%Y%m%d')) + + self.assertEqual( + date_from_ago('1 hour ago'), + (datetime.datetime.utcnow() - datetime.timedelta(hours=1)).strftime('%Y%m%d')) + + self.assertEqual( + date_from_ago('2 hours ago'), + (datetime.datetime.utcnow() - datetime.timedelta(hours=2)).strftime('%Y%m%d')) + + self.assertEqual( + date_from_ago('5 days ago'), + (datetime.datetime.utcnow() - datetime.timedelta(days=5)).strftime('%Y%m%d')) + + self.assertEqual( + date_from_ago('2 months ago'), + (datetime.datetime.utcnow() - datetime.timedelta(days=60)).strftime('%Y%m%d')) + + self.assertEqual( + date_from_ago('10 years ago'), + (datetime.datetime.utcnow() - datetime.timedelta(days=3650)).strftime('%Y%m%d')) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6e8fc3961..8c6e3abd5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -547,6 +547,12 @@ from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .konserthusetplay import KonserthusetPlayIE from .krasview import KrasViewIE +from .ktplayer import ( + CambroIE, + CamWhoresIE, + CamhubIE, + NudespreeIE, +) from .ku6 import Ku6IE from .kusi import KUSIIE from .kuwo import ( diff --git a/youtube_dl/extractor/ktplayer.py b/youtube_dl/extractor/ktplayer.py new file mode 100644 index 000000000..a39c18895 --- /dev/null +++ b/youtube_dl/extractor/ktplayer.py @@ -0,0 +1,311 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + date_from_ago, + parse_duration, + url_or_none, +) + + +class KtPlayerHelper: + """KtPlayerHelper contains utility functions for video URL re-encoding + performed by kt_player that is used by cambro, camhub, etc. + """ + @staticmethod + def _hash_kt_player_lic_code(code): + """Some hash algorithm extracted from obfuscated JS + in: '$385023312702592' + out: '49618502835613441220119020166725' + """ + if not code: + return code + code_no_zeros = '' + for lim in range(1, len(code)): + val = int(code[lim]) + code_no_zeros += str(val) if val else '1' + mid = int(len(code_no_zeros) / 2) + left = int(code_no_zeros[0:mid + 1]) + right = int(code_no_zeros[mid:]) + val = abs(right - left) + abs(left - right) + val *= 2 + val = str(val) + lim = 10 + result = "" + i = 0 + while i < mid + 1: + for j in range(1, 5): + n = int(code[i + j]) + int(val[i]) + if n >= lim: + n -= lim + result += str(n) + i += 1 + return result + + @staticmethod + def convert_video_hash(lic_code, orig_hash, limit=32): + """Video url hash converter extracted from obfuscated JS + input '$385023312702592', 'bed397181d043299c43f63582406a20b' + output '8b0bdf194430202ed49325c186633a79' + input '$518170117095338', '8b25b576ffbf46fa3dc91e34eddc2190b7d3146586' + output 'f34c6dff1f890e75b6b59422dde3b1acb7d3146586' + In order to find a corresponding code in cambro.tv/camhub.com scripts + do the following: + 1. Set a breakpoint at kt_start + 2. Execute in CDT console when triggered: + flashvars._video_url = flashvars.video_url; + Object.defineProperty(flashvars, 'video_url', { + get: function () { + return flashvars._video_url; + }, + set: function (value) { + debugger; + flashvars._video_url = value; + } + }); + 3. The second break is where the url re-encoding happens + """ + i = KtPlayerHelper._hash_kt_player_lic_code(lic_code) + h = orig_hash[0:limit] + for k in range(len(h) - 1, -1, -1): + l = k + for m in range(k, len(i)): + l += int(i[m]) + while l >= len(h): + l -= len(h) + n = "" + for o in range(0, len(h)): + if o == k: + n += h[l] + elif o == l: + n += h[k] + else: + n += h[o] + h = n + return h + orig_hash[limit:] + + @staticmethod + def get_url(webpage): + def search(pattern, string, flags=0): + mobj = re.search(pattern, string, flags) + if mobj: + return next(g for g in mobj.groups() if g is not None) + return None + + # extract video url + license_code = search(r'license_code:\s+\'(.+?)\'', webpage) + video_raw_url = search(r'video_url:\s+\'(.+?)\'', webpage) + if not license_code or not video_raw_url: + return None + + # decode a real video url + parts = video_raw_url.split('/') + video_pre_parts = [] + + # cut some junk at the beginning + for i in range(len(parts)): + if parts[i].startswith('http'): + video_pre_parts = parts[i:] + if len(video_pre_parts) < 6: + # it is expected to be + # http://example.com/get_file/2/1039a5cd2f433e4d41adf41e0afc1773/223000/223101/223101.mp4/ + # with a hash value as 5th component + raise ExtractorError('url too short: %s' % (video_pre_parts, )) + + # convert video hash to a real one + orig_hash = video_pre_parts[5] + new_hash = KtPlayerHelper.convert_video_hash(license_code, orig_hash) + video_pre_parts[5] = new_hash + video_url = '/'.join(video_pre_parts) + + return video_url + + +class KtPlayerExtractor(InfoExtractor): + """Base class for kt-player based websites. + Supports both inlined and embedded usage variants. + + _DURATION_RE and _UPLOADED_RE class vars + must be set in subclasses as needed. + """ + + _DURATION_RE = None + _UPLOADED_RE = None + + def _kt_extract(self, url, embedded=False): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + title = mobj.group('title') + website = mobj.group('site') + + webpage = self._download_webpage(url, video_id) + + if 'This video is a private video' in webpage: + raise ExtractorError( + 'Video %s is private' % video_id, expected=True) + + flashdata = webpage + if embedded: + # find the iframe with a player + iframe_src = self._html_search_regex( + r'