From 28f94a2828b5a3531ca529842e47c72a591f4c54 Mon Sep 17 00:00:00 2001 From: palewire Date: Sun, 7 Aug 2022 08:03:17 -0700 Subject: [PATCH] [truth] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/truth.py | 78 ++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 youtube_dl/extractor/truth.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 751fc38b6..4ecc1643b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1295,6 +1295,7 @@ from .trovo import ( TrovoVodIE, ) from .trunews import TruNewsIE +from .truth import TruthIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/truth.py b/youtube_dl/extractor/truth.py new file mode 100644 index 000000000..5a5311eeb --- /dev/null +++ b/youtube_dl/extractor/truth.py @@ -0,0 +1,78 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import clean_html, int_or_none, unified_timestamp + + +class TruthIE(InfoExtractor): + """Extract videos from posts on Donald Trump's truthsocial.com.""" + + _VALID_URL = r"https://truthsocial\.com/@[^/]+/posts/(?P[0-9]+)" + _TESTS = [ + { + "url": "https://truthsocial.com/@realDonaldTrump/posts/108779000807761862", + "md5": "4a5fb1470c192e493d9efd6f19e514d3", + "info_dict": { + "id": "108779000807761862", + "ext": "qt", + "title": "Untitled", + "timestamp": 1659835827, + "upload_date": "20220807", + "uploader": "Donald J. Trump", + "uploader_id": "realDonaldTrump", + }, + }, + { + "url": "https://truthsocial.com/@ProjectVeritasAction/posts/108618228543962049", + "md5": "fd47ba68933f9dce27accc52275be9c3", + "info_dict": { + "id": "108618228543962049", + "ext": "mp4", + "title": "RETRACTO #368: Utah NPR Affiliate RETRACTS False Claim Live On Air Following Veritas' Reporting on Curtis Campaign “Nothing I ever do will suffice for these people. They are engaged in conspiracy theories. They are doing precisely the thing they project that I do. Which is they don’t believe in facts, they don’t believe in logic, and they don’t believe in rationality.” - James O’Keefe", + "timestamp": 1657382637, + "upload_date": "20220709", + "uploader": "Project Veritas Action", + "uploader_id": "ProjectVeritasAction", + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + # Get data from API + api_url = "https://truthsocial.com/api/v1/statuses/" + video_id + status = self._download_json(api_url, video_id) + + # Pull out video + attachments = status["media_attachments"] + video = attachments[0] + url = video["url"] + + # Pull out metadata + title = clean_html(status.get("content")).replace("\n", "") or "Untitled" + account = status.get("account") or {} + timestamp = unified_timestamp(status.get("created_at")) + uploader = account.get("display_name") + uploader_id = account.get("username") + uploader_url = ( + "https://truthsocial.com/@" + uploader_id if uploader_id else None + ) + repost_count = int_or_none(status.get("reblogs_count")) + like_count = int_or_none(status.get("favourites_count")) + comment_count = int_or_none(status.get("replies_count")) + + return { + "id": video_id, + "url": url, + "title": title, + "timestamp": timestamp, + "uploader": uploader, + "uploader_id": uploader_id, + "uploader_url": uploader_url, + "repost_count": repost_count, + "like_count": like_count, + "comment_count": comment_count, + }