From e0edf1e041caa32195e1d9bcb3966dae2fbc39c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Tue, 23 Nov 2010 21:20:26 -0200 Subject: [PATCH 01/69] Give preference to WebM formats. This patch gives preference to formats that are Free. --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 24722d292..cd1e9fc86 100755 --- a/youtube-dl +++ b/youtube-dl @@ -719,7 +719,7 @@ class YoutubeIE(InfoExtractor): _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality - _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] + _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', From 92743d423a7dfaf0f803deab14475e6343091f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Thu, 25 Nov 2010 04:24:45 -0200 Subject: [PATCH 02/69] Preliminary downloading from vimeo --- youtube-dl | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/youtube-dl b/youtube-dl index 8dd03daf3..edd1d3f29 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1718,6 +1718,118 @@ class YahooIE(InfoExtractor): self._downloader.trouble(u'\nERROR: unable to download video') +class VimeoIE(InfoExtractor): + """Information extractor for vimeo.com.""" + + # _VALID_URL matches Vimeo URLs + _VALID_URL = r'(?:http://)?vimeo\.com/([0-9]+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(VimeoIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[video.vimeo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[video.vimeo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url, new_video=True): + # Extract ID from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + # At this point we have a new video + self._downloader.increment_downloads() + video_id = mobj.group(1) + video_extension = 'flv' # FIXME + + # Retrieve video webpage to extract further information + request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + mobj = re.search(r'http://vimeo.com/(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video uploader') + return + video_uploader = mobj.group(1).decode('utf-8') + + # Extract video thumbnail + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = mobj.group(1).decode('utf-8') + + # # Extract video description + # mobj = re.search(r'', webpage) + # if mobj is None: + # self._downloader.trouble(u'ERROR: unable to extract video description') + # return + # video_description = mobj.group(1).decode('utf-8') + # if not video_description: video_description = 'No description available.' + video_description = 'Foo.' + + # Extract request signature + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract request signature') + return + sig = mobj.group(1).decode('utf-8') + + # Extract request signature expiration + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract request signature expiration') + return + sig_exp = mobj.group(1).decode('utf-8') + + video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url, + 'uploader': video_uploader, + 'upload_date': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description, + 'thumbnail': video_thumbnail, + 'description': video_description, + 'player_url': None, + }) + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') + + class GenericIE(InfoExtractor): """Generic last-resort information extractor.""" @@ -2537,6 +2649,7 @@ if __name__ == '__main__': parser.error(u'invalid playlist end number specified') # Information extractors + vimeo_ie = VimeoIE() youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) dailymotion_ie = DailymotionIE() @@ -2588,6 +2701,7 @@ if __name__ == '__main__': 'nopart': opts.nopart, 'updatetime': opts.updatetime, }) + fd.add_info_extractor(vimeo_ie) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(youtube_user_ie) From c5a088d341e3aeaf65fbca02523c02ff3bccee6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Sat, 29 Jan 2011 04:13:54 -0200 Subject: [PATCH 03/69] Use non-greedy regexps, for safety. Since I was very lazy when I coded this, I took the fastest route. Luckily, Vasyl' Vavrychuk pointed this out and I went (after many months) and just did some minor changes. --- youtube-dl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index edd1d3f29..e7459062d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1765,21 +1765,21 @@ class VimeoIE(InfoExtractor): # Extract uploader and title from webpage self.report_extraction(video_id) - mobj = re.search(r'(.*)', webpage) + mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') return video_title = mobj.group(1).decode('utf-8') simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - mobj = re.search(r'http://vimeo.com/(.*)', webpage) + mobj = re.search(r'http://vimeo.com/(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video uploader') return video_uploader = mobj.group(1).decode('utf-8') # Extract video thumbnail - mobj = re.search(r'(.*)', webpage) + mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video thumbnail') return @@ -1795,14 +1795,14 @@ class VimeoIE(InfoExtractor): video_description = 'Foo.' # Extract request signature - mobj = re.search(r'(.*)', webpage) + mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract request signature') return sig = mobj.group(1).decode('utf-8') # Extract request signature expiration - mobj = re.search(r'(.*)', webpage) + mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract request signature expiration') return From f24c674b048003d878a1d6436c1b2af47693f2ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Fri, 4 Feb 2011 04:02:29 -0200 Subject: [PATCH 04/69] Make some of the comments more descriptive. --- youtube-dl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index b96156be7..a925c9783 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1764,8 +1764,12 @@ class VimeoIE(InfoExtractor): self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) return - # Extract uploader and title from webpage + # Now we begin extracting as much information as we can from what we + # retrieved. First we extract the information common to all extractors, + # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) + + # Extract title mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') @@ -1773,6 +1777,7 @@ class VimeoIE(InfoExtractor): video_title = mobj.group(1).decode('utf-8') simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + # Extract uploader mobj = re.search(r'http://vimeo.com/(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video uploader') @@ -1795,14 +1800,14 @@ class VimeoIE(InfoExtractor): # if not video_description: video_description = 'No description available.' video_description = 'Foo.' - # Extract request signature + # Vimeo specific: extract request signature mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract request signature') return sig = mobj.group(1).decode('utf-8') - # Extract request signature expiration + # Vimeo specific: Extract request signature expiration mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract request signature expiration') From 8cc98b2358fb4554c7af9dcd38fd4c96262e5ac3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Fri, 4 Feb 2011 06:15:27 -0200 Subject: [PATCH 05/69] vimeo: Also accept URLs prefixed by www. I hope that this doesn't break anything. `:)` --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index a925c9783..16d234ebf 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1723,7 +1723,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:http://)?vimeo\.com/([0-9]+)' + _VALID_URL = r'(?:http://)?(?:www.)?vimeo\.com/([0-9]+)' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) From a7e5259c33851725243b13f01929e75bb40e0ea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Thu, 17 Feb 2011 08:25:45 -0200 Subject: [PATCH 06/69] vimeo: Make regexp more robust. This change makes the VimeoIE work with http://player.vimeo.com/video/19267888 --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 16d234ebf..780a6d9a2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1723,7 +1723,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:http://)?(?:www.)?vimeo\.com/([0-9]+)' + _VALID_URL = r'(?:http://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) From 0ecedbdb036120849c2a7eb992ec8a993221e5f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 20 Apr 2011 21:07:57 -0300 Subject: [PATCH 07/69] vimeo: Remove clutter in some messages. We should make a unified way of printing messages, but let's follow suit and do what the main YoutubeIE does here. --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 240b2bc7b..080490ded 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1735,11 +1735,11 @@ class VimeoIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_screen(u'[video.vimeo] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_screen(u'[video.vimeo] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id) def _real_initialize(self): return From 1e055db69ccffbacad5765887f14879bbe350ce2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 20 Apr 2011 21:15:57 -0300 Subject: [PATCH 08/69] vimeo: Ignore if we are using HTTP/S or not. --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 080490ded..17fb82da7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1724,7 +1724,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:http://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)' + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) From 44c636df8966a1ace617b276f19b5887aa66d612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 20 Apr 2011 21:20:55 -0300 Subject: [PATCH 09/69] vimeo: Tweak the regexp to allow some extended URLs from vimeo. This, in particular, lets me grab the videos from the beginners channel with URLs like: http://vimeo.com/groups/fivebyfive/videos/22648611 Note that the regexp *will* break for other URLs that we don't know about and that's on purpose: we don't want to accidentally grab videos that would be passed on to other information extractors. --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 17fb82da7..f3d7a3f61 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1724,7 +1724,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)' + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) From 2fc31a48723fd4f84c20cf97f810f0171419bcf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 20 Apr 2011 21:29:29 -0300 Subject: [PATCH 10/69] vimeo: Apparently, all videos in vimeo are served in ISO containers. --- youtube-dl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index f3d7a3f61..b734c997c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1754,7 +1754,6 @@ class VimeoIE(InfoExtractor): # At this point we have a new video self._downloader.increment_downloads() video_id = mobj.group(1) - video_extension = 'flv' # FIXME # Retrieve video webpage to extract further information request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers) @@ -1826,7 +1825,7 @@ class VimeoIE(InfoExtractor): 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, - 'ext': video_extension.decode('utf-8'), + 'ext': u'mp4', 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'thumbnail': video_thumbnail, From 7745f5d88189530d2270531376c719594333c6f7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 21 Jun 2011 22:24:58 +0200 Subject: [PATCH 11/69] Basic blip.tv support --- youtube-dl | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/youtube-dl b/youtube-dl index 3ac27a857..a6d0ce434 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,6 +15,7 @@ import email.utils import gzip import htmlentitydefs import httplib +import json # TODO: json for 2.5 import locale import math import netrc @@ -2563,6 +2564,80 @@ class FacebookIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'\nERROR: unable to download video') +class BlipTVIE(InfoExtractor): + """Information extractor for blip.tv""" + + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv/(.+)$' + _URL_EXT = r'^.*\.([a-z0-9]+)$' + + @staticmethod + def suitable(url): + return (re.match(BlipTVIE._VALID_URL, url) is not None) + + def report_download_webpage(self, file_id): + """Report webpage download.""" + self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.service_name, file_id)) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.service_name, file_id)) + + @property + def service_name(self): + return u'blip.tv' + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1' + request = urllib2.Request(json_url) + try: + json_code = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + return + try: + json_data = json.loads(json_code) + data = json_data['Post'] + + upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') + video_url = data['media']['url'] + umobj = re.match(self._URL_EXT, video_url) + if umobj is None: + raise ValueError('Can not determine filename extension') + ext = umobj.group(1) + + info = { + 'id': data['item_id'], + 'url': video_url, + 'uploader': data['display_name'], + 'upload_date': upload_date, + 'title': data['title'], + 'stitle': self._simplify_title(data['title']), + 'ext': ext, + 'format': data['media']['mimeType'], + 'thumbnail': data['thumbnailUrl'], + 'description': data['description'], + 'player_url': data['embedUrl'] + } + except (ValueError,KeyError), err: + self._downloader.trouble(u'ERROR: unable to parse video information: %s' % str(err)) + return + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download video') + + class PostProcessor(object): """Post Processor class. @@ -2911,6 +2986,7 @@ if __name__ == '__main__': yahoo_search_ie = YahooSearchIE(yahoo_ie) deposit_files_ie = DepositFilesIE() facebook_ie = FacebookIE() + bliptv_ie = BlipTVIE() generic_ie = GenericIE() # File downloader @@ -2963,6 +3039,7 @@ if __name__ == '__main__': fd.add_info_extractor(yahoo_search_ie) fd.add_info_extractor(deposit_files_ie) fd.add_info_extractor(facebook_ie) + fd.add_info_extractor(bliptv_ie) # This must come last since it's the # fallback if none of the others work From aded78d9e213803ff5ab2cdf429c2f8578482194 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 25 Jun 2011 19:26:29 +0200 Subject: [PATCH 12/69] Support for blip.tv/file URLs --- youtube-dl | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/youtube-dl b/youtube-dl index a6d0ce434..fbb0389be 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2567,24 +2567,16 @@ class FacebookIE(InfoExtractor): class BlipTVIE(InfoExtractor): """Information extractor for blip.tv""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv/(.+)$' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv(/.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' @staticmethod def suitable(url): return (re.match(BlipTVIE._VALID_URL, url) is not None) - def report_download_webpage(self, file_id): - """Report webpage download.""" - self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.service_name, file_id)) - def report_extraction(self, file_id): """Report information extraction.""" - self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.service_name, file_id)) - - @property - def service_name(self): - return u'blip.tv' + self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id) def _simplify_title(self, title): res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) @@ -2599,6 +2591,7 @@ class BlipTVIE(InfoExtractor): json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1' request = urllib2.Request(json_url) + self.report_extraction(mobj.group(1)) try: json_code = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2606,7 +2599,7 @@ class BlipTVIE(InfoExtractor): return try: json_data = json.loads(json_code) - data = json_data['Post'] + data = json_data['Post'] if 'Post' in json_data else json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') video_url = data['media']['url'] @@ -2629,7 +2622,7 @@ class BlipTVIE(InfoExtractor): 'player_url': data['embedUrl'] } except (ValueError,KeyError), err: - self._downloader.trouble(u'ERROR: unable to parse video information: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err)) return try: From c6b55a8d4817a0818a1923db72b0f953ab80c0d4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 7 Jul 2011 12:12:20 +0200 Subject: [PATCH 13/69] Full youtube video descriptions, including special characters (2.6+, with fallback for older Pythons) --- youtube-dl | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index fbb0389be..a3522199f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,6 @@ import email.utils import gzip import htmlentitydefs import httplib -import json # TODO: json for 2.5 import locale import math import netrc @@ -24,20 +23,35 @@ import os.path import re import socket import string -import StringIO import subprocess import sys import time import urllib import urllib2 +import warnings import zlib +try: + import json +except ImportError: + warnings.warn('No JSON support (TODO: insert trivialjson here)') + +try: + import cStringIO as StringIO +except ImportError: + import StringIO + # parse_qs was moved from the cgi module to the urlparse module recently. try: from urlparse import parse_qs except ImportError: from cgi import parse_qs +try: + import lxml.etree +except ImportError: # Python < 2.6 + pass # Handled below + std_headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', @@ -1068,11 +1082,19 @@ class YoutubeIE(InfoExtractor): pass # description - video_description = 'No description available.' - if self._downloader.params.get('forcedescription', False): - mobj = re.search(r'', video_webpage) - if mobj is not None: - video_description = mobj.group(1) + try: + lxml.etree + except NameError: + video_description = u'No description available.' + if self._downloader.params.get('forcedescription', False): + warnings.warn(u'You are using an old Python version, install Python 2.6+ or lxml. Falling back to old video description extractor.') + mobj = re.search(r'', video_webpage) + if mobj is not None: + video_description = mobj.group(1).decode('utf-8') + else: + html_parser = lxml.etree.HTMLParser(encoding='utf-8') + vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser) + video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()')) # token video_token = urllib.unquote_plus(video_info['token'][0]) @@ -1130,7 +1152,7 @@ class YoutubeIE(InfoExtractor): 'ext': video_extension.decode('utf-8'), 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), 'thumbnail': video_thumbnail.decode('utf-8'), - 'description': video_description.decode('utf-8'), + 'description': video_description, 'player_url': player_url, }) except UnavailableVideoError, err: From 8b95c38707b8e6c9f2ce6a754d77b2b8f458cc14 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 7 Jul 2011 12:47:36 +0200 Subject: [PATCH 14/69] --writedescription option --- youtube-dl | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index a3522199f..bb0f90a9c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -8,6 +8,10 @@ # Author: Paweł Paprota # Author: Gergely Imreh # License: Public domain code + +from __future__ import with_statement + +import contextlib import cookielib import ctypes import datetime @@ -301,6 +305,7 @@ class FileDownloader(object): consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. + writedescription: Write the video description to a .description file """ params = None @@ -496,6 +501,10 @@ class FileDownloader(object): except: pass + def report_writedescription(self, descfn): + """ Report that the description file has been written """ + self.to_screen(u'[info] Video description written to: %s' % descfn, ignore_encoding_errors=True) + def report_destination(self, filename): """Report destination filename.""" self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) @@ -582,6 +591,16 @@ class FileDownloader(object): self.trouble(u'ERROR: unable to create directories: %s' % str(err)) return + if self.params.get('writedescription', False): + try: + descfn = filename + '.description' + with contextlib.closing(open(descfn, 'wb')) as descfile: + descfile.write(info_dict['description'].encode('utf-8')) + self.report_writedescription(descfn) + except (OSError, IOError): + self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) + return + try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: @@ -1086,7 +1105,7 @@ class YoutubeIE(InfoExtractor): lxml.etree except NameError: video_description = u'No description available.' - if self._downloader.params.get('forcedescription', False): + if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False): warnings.warn(u'You are using an old Python version, install Python 2.6+ or lxml. Falling back to old video description extractor.') mobj = re.search(r'', video_webpage) if mobj is not None: @@ -2529,10 +2548,7 @@ class FacebookIE(InfoExtractor): pass # description - video_description = 'No description available.' - if (self._downloader.params.get('forcedescription', False) and - 'description' in video_info): - video_description = video_info['description'] + video_description = video_info.get('description', 'No description available.') url_map = video_info['video_urls'] if len(url_map.keys()) > 0: @@ -2903,6 +2919,9 @@ if __name__ == '__main__': filesystem.add_option('--no-mtime', action='store_false', dest='updatetime', help='do not use the Last-modified header to set the file modification time', default=True) + filesystem.add_option('--write-description', + action='store_true', dest='writedescription', + help='write video description to a .description file', default=False) parser.add_option_group(filesystem) postproc = optparse.OptionGroup(parser, 'Post-processing Options') @@ -3040,6 +3059,7 @@ if __name__ == '__main__': 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, 'updatetime': opts.updatetime, + 'writedescription': opts.writedescription, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From a1cab7cead8554913ba3d0362b3d6fb11c8c7f90 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 7 Jul 2011 14:10:25 +0200 Subject: [PATCH 15/69] call increment_downloads in blip.tv extractor --- youtube-dl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube-dl b/youtube-dl index bb0f90a9c..466802434 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2646,6 +2646,8 @@ class BlipTVIE(InfoExtractor): raise ValueError('Can not determine filename extension') ext = umobj.group(1) + self._downloader.increment_downloads() + info = { 'id': data['item_id'], 'url': video_url, From 2152ee8601b7abe0481217f5de4950f268e24d08 Mon Sep 17 00:00:00 2001 From: Giovanni Visentini Date: Sat, 9 Jul 2011 14:05:36 +0000 Subject: [PATCH 16/69] Update youtube playlist for use playlist?list=id format --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 3ac27a857..96993f536 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2097,7 +2097,7 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' From 437d76c19a98dcc965fdf0e92356f54e5569a565 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 10 Jul 2011 17:31:54 +0200 Subject: [PATCH 17/69] blip.tv support for python 2.5 with trivialjson --- youtube-dl | 123 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 115 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 466802434..64c1f30cd 100755 --- a/youtube-dl +++ b/youtube-dl @@ -7,10 +7,9 @@ # Author: Witold Baryluk # Author: Paweł Paprota # Author: Gergely Imreh +# Author: Philipp Hagemeister # License: Public domain code - from __future__ import with_statement - import contextlib import cookielib import ctypes @@ -35,11 +34,6 @@ import urllib2 import warnings import zlib -try: - import json -except ImportError: - warnings.warn('No JSON support (TODO: insert trivialjson here)') - try: import cStringIO as StringIO except ImportError: @@ -66,6 +60,119 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +try: + import json +except ImportError: # Python <2.5, use trivialjson (https://github.com/phihag/trivialjson): + import re + class json(object): + @staticmethod + def loads(s): + s = s.decode('UTF-8') + def raiseError(msg, i): + raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:])) + def skipSpace(i, expectMore=True): + while i < len(s) and s[i] in ' \t\r\n': + i += 1 + if expectMore: + if i >= len(s): + raiseError('Premature end', i) + return i + def decodeEscape(match): + esc = match.group(1) + _STATIC = { + '"': '"', + '\\': '\\', + '/': '/', + 'b': unichr(0x8), + 'f': unichr(0xc), + 'n': '\n', + 'r': '\r', + 't': '\t', + } + if esc in _STATIC: + return _STATIC[esc] + if esc[0] == 'u': + if len(esc) == 1+4: + return unichr(int(esc[1:5], 16)) + if len(esc) == 5+6 and esc[5:7] == '\\u': + hi = int(esc[1:5], 16) + low = int(esc[7:11], 16) + return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000) + raise ValueError('Unknown escape ' + str(esc)) + def parseString(i): + i += 1 + e = i + while True: + e = s.index('"', e) + bslashes = 0 + while s[e-bslashes-1] == '\\': + bslashes += 1 + if bslashes % 2 == 1: + e += 1 + continue + break + rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') + stri = rexp.sub(decodeEscape, s[i:e]) + return (e+1,stri) + def parseObj(i): + i += 1 + res = {} + i = skipSpace(i) + if s[i] == '}': # Empty dictionary + return (i+1,res) + while True: + if s[i] != '"': + raiseError('Expected a string object key', i) + i,key = parseString(i) + i = skipSpace(i) + if i >= len(s) or s[i] != ':': + raiseError('Expected a colon', i) + i,val = parse(i+1) + res[key] = val + i = skipSpace(i) + if s[i] == '}': + return (i+1, res) + if s[i] != ',': + raiseError('Expected comma or closing curly brace', i) + i = skipSpace(i+1) + def parseArray(i): + res = [] + i = skipSpace(i+1) + if s[i] == ']': # Empty array + return (i+1,res) + while True: + i,val = parse(i) + res.append(val) + i = skipSpace(i) # Raise exception if premature end + if s[i] == ']': + return (i+1, res) + if s[i] != ',': + raiseError('Expected a comma or closing bracket', i) + i = skipSpace(i+1) + def parseDiscrete(i): + for k,v in {'true': True, 'false': False, 'null': None}.items(): + if s.startswith(k, i): + return (i+len(k), v) + raiseError('Not a boolean (or null)', i) + def parseNumber(i): + mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:]) + if mobj is None: + raiseError('Not a number', i) + nums = mobj.group(1) + if '.' in nums or 'e' in nums or 'E' in nums: + return (i+len(nums), float(nums)) + return (i+len(nums), int(nums)) + CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete} + def parse(i): + i = skipSpace(i) + i,res = CHARMAP.get(s[i], parseNumber)(i) + i = skipSpace(i, False) + return (i,res) + i,res = parse(0) + if i < len(s): + raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')') + return res + def preferredencoding(): """Get preferred encoding. @@ -2831,7 +2938,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.03.29', + version='2011.07.09-phihag', conflict_handler='resolve', ) From 6eb08fbf8b02452015ad1e09b4c5e8c1d5aa2bfe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 10 Jul 2011 21:39:36 +0200 Subject: [PATCH 18/69] + --write-info-json --- youtube-dl | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 64c1f30cd..578f473b8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -413,6 +413,7 @@ class FileDownloader(object): nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file """ params = None @@ -609,8 +610,12 @@ class FileDownloader(object): pass def report_writedescription(self, descfn): - """ Report that the description file has been written """ - self.to_screen(u'[info] Video description written to: %s' % descfn, ignore_encoding_errors=True) + """ Report that the description file is being written """ + self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True) + + def report_writeinfojson(self, infofn): + """ Report that the metadata file has been written """ + self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True) def report_destination(self, filename): """Report destination filename.""" @@ -701,13 +706,29 @@ class FileDownloader(object): if self.params.get('writedescription', False): try: descfn = filename + '.description' + self.report_writedescription(descfn) with contextlib.closing(open(descfn, 'wb')) as descfile: descfile.write(info_dict['description'].encode('utf-8')) - self.report_writedescription(descfn) except (OSError, IOError): self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) return + print(repr(self.params)) + if self.params.get('writeinfojson', False): + infofn = filename + '.info.json' + self.report_writeinfojson(infofn) + try: + json.dump + except (NameError,AttributeError): + self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') + return + try: + with contextlib.closing(open(infofn, 'wb')) as infof: + json.dump(info_dict, infof) + except (OSError, IOError): + self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn)) + return + try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: @@ -3031,6 +3052,9 @@ if __name__ == '__main__': filesystem.add_option('--write-description', action='store_true', dest='writedescription', help='write video description to a .description file', default=False) + filesystem.add_option('--write-info-json', + action='store_true', dest='writeinfojson', + help='write video metadata to a .info.json file', default=False) parser.add_option_group(filesystem) postproc = optparse.OptionGroup(parser, 'Post-processing Options') @@ -3169,6 +3193,7 @@ if __name__ == '__main__': 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'writedescription': opts.writedescription, + 'writeinfojson': opts.writeinfojson, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From 5623100e43b12413790c4017eafb3412e29e517f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 10 Jul 2011 23:41:19 +0200 Subject: [PATCH 19/69] remove debugging code --- youtube-dl | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 578f473b8..762bfc3ef 100755 --- a/youtube-dl +++ b/youtube-dl @@ -713,7 +713,6 @@ class FileDownloader(object): self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) return - print(repr(self.params)) if self.params.get('writeinfojson', False): infofn = filename + '.info.json' self.report_writeinfojson(infofn) From 91e6a3855b4d76cefe7415ea9a1bafd98ad6bece Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 18 Jul 2011 19:43:21 +0200 Subject: [PATCH 20/69] Be lenient about download URLs (Closes #108) --- youtube-dl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 762bfc3ef..07904604c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -62,7 +62,7 @@ simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode try: import json -except ImportError: # Python <2.5, use trivialjson (https://github.com/phihag/trivialjson): +except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): import re class json(object): @staticmethod @@ -1241,6 +1241,7 @@ class YoutubeIE(InfoExtractor): html_parser = lxml.etree.HTMLParser(encoding='utf-8') vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser) video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()')) + # TODO use another parser # token video_token = urllib.unquote_plus(video_info['token'][0]) @@ -1248,7 +1249,7 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]: + if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: From 47b8dab29e8930a752533e74a87befd52ca4a2a8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 22 Jul 2011 15:28:42 +0200 Subject: [PATCH 21/69] Removed inaccurate warning --- youtube-dl | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 07904604c..17a2da2ef 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1233,7 +1233,6 @@ class YoutubeIE(InfoExtractor): except NameError: video_description = u'No description available.' if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False): - warnings.warn(u'You are using an old Python version, install Python 2.6+ or lxml. Falling back to old video description extractor.') mobj = re.search(r'', video_webpage) if mobj is not None: video_description = mobj.group(1).decode('utf-8') From c31b124d7a2c754f3ca5c6f8de8c501cfbad895a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 31 Jul 2011 18:09:53 +0200 Subject: [PATCH 22/69] Suppport for youtube video streams (Mentioned in #108) --- youtube-dl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 17a2da2ef..27ae816e0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1248,8 +1248,14 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) + raw_map = None if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1: - url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + raw_map = video_info['fmt_url_map'][0] + elif 'fmt_stream_map' in video_info and len(video_info['fmt_stream_map']) >= 1: + raw_map = video_info['fmt_stream_map'][0] + + if raw_map is not None: + url_map = dict(tuple(pair.split('|')[:2]) for pair in raw_map.split(',')) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: format_list = self._available_formats[self._available_formats.index(format_limit):] From 0ac22e4f5a652f1b470f9daff06c1361e8f93c16 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 4 Aug 2011 00:04:55 +0200 Subject: [PATCH 23/69] Fix youtube downloads (Closes #135) --- youtube-dl | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 27ae816e0..0d77585a8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1248,14 +1248,11 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - raw_map = None - if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1: - raw_map = video_info['fmt_url_map'][0] - elif 'fmt_stream_map' in video_info and len(video_info['fmt_stream_map']) >= 1: - raw_map = video_info['fmt_stream_map'][0] - - if raw_map is not None: - url_map = dict(tuple(pair.split('|')[:2]) for pair in raw_map.split(',')) + if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') + url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] + url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) + format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: format_list = self._available_formats[self._available_formats.index(format_limit):] From 6bcd846b5212b41877b80358125b2210432e080d Mon Sep 17 00:00:00 2001 From: Daniel Bolton Date: Fri, 5 Aug 2011 19:14:13 -0400 Subject: [PATCH 24/69] Add README.md (markdown file) --- README.md | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..7fc1267c2 --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +# youtube-dl + +## USAGE +*** +youtube-dl [OPTIONS] URL + +## DESCRIPTION +*** +**youtube-dl** is a small command-line program to download videos from +YouTube.com and a few more sites. It requires the Python interpreter, version +2.x (x being at least 5), and it is not platform specific. It should work in +your Unix box, in Windows or in Mac OS X. It is released to the public domain, +which means you can modify it, redistribute it or use it however you like. + +## OPTIONS +*** + -h, --help print this help text and exit + -v, --version print program version and exit + -U, --update update this program to latest stable version + -i, --ignore-errors continue on download errors + -r LIMIT, --rate-limit=LIMIT + download rate limit (e.g. 50k or 44.6m) + -R RETRIES, --retries=RETRIES + number of retries (default is 10) + --playlist-start=NUMBER + playlist video to start at (default is 1) + --playlist-end=NUMBER + playlist video to end at (default is last) + --dump-user-agent display the current browser identification + +### Authentication Options + -u USERNAME, --username=USERNAME + account username + -p PASSWORD, --password=PASSWORD + account password + -n, --netrc use .netrc authentication data + +### Video Format Options + -f FORMAT, --format=FORMAT + video format code + --all-formats download all available video formats + --max-quality=FORMAT + highest quality format to download + +### Verbosity / Simulation Options + -q, --quiet activates quiet mode + -s, --simulate do not download video + -g, --get-url simulate, quiet but print URL + -e, --get-title simulate, quiet but print title + --get-thumbnail simulate, quiet but print thumbnail URL + --get-description simulate, quiet but print video description + --get-filename simulate, quiet but print output filename + --no-progress do not print progress bar + --console-title display progress in console titlebar + +### Filesystem Options + -t, --title use title in file name + -l, --literal use literal title in file name + -A, --auto-number number downloaded files starting from 00000 + -o TEMPLATE, --output=TEMPLATE + output filename template + -a FILE, --batch-file=FILE + file containing URLs to download ('-' for stdin) + -w, --no-overwrites + do not overwrite files + -c, --continue resume partially downloaded files + --cookies=FILE file to dump cookie jar to + --no-part do not use .part files + --no-mtime do not use the Last-modified header to set the file + modification time + +### Post-processing Options: + --extract-audio convert video files to audio-only files (requires + ffmpeg and ffprobe) + --audio-format=FORMAT + "best", "aac" or "mp3"; best by default + +## COPYRIGHT +*** +**youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is +released into the public domain by the copyright holder. This README file was +originally written by Daniel Bolton () and is +likewise released into the public domain. + +## BUGS +*** +Bugs should be reported at: + From 2bf94b311659afe0e3aeb7149edcbb192f8093e7 Mon Sep 17 00:00:00 2001 From: Daniel Bolton Date: Fri, 5 Aug 2011 19:15:57 -0400 Subject: [PATCH 25/69] Remove horizontal rules from README.md --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index 7fc1267c2..cf38e31fc 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,9 @@ # youtube-dl ## USAGE -*** youtube-dl [OPTIONS] URL ## DESCRIPTION -*** **youtube-dl** is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 5), and it is not platform specific. It should work in @@ -13,7 +11,6 @@ your Unix box, in Windows or in Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. ## OPTIONS -*** -h, --help print this help text and exit -v, --version print program version and exit -U, --update update this program to latest stable version @@ -76,13 +73,11 @@ which means you can modify it, redistribute it or use it however you like. "best", "aac" or "mp3"; best by default ## COPYRIGHT -*** **youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is released into the public domain by the copyright holder. This README file was originally written by Daniel Bolton () and is likewise released into the public domain. ## BUGS -*** Bugs should be reported at: From f137bef973729bf1da0f8dfe244d5ff24cb2ad23 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 6 Aug 2011 11:05:57 +0200 Subject: [PATCH 26/69] Fix RTMP streams and ignore url-less entries --- youtube-dl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 0d77585a8..76d41c448 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1248,9 +1248,14 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + print(repr(video_info['conn'][0])) + elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] + url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) format_limit = self._downloader.params.get('format_limit', None) @@ -1272,11 +1277,6 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'ERROR: requested format not available') return video_url_list = [(req_format, url_map[req_format])] # Specific format - - elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_url_list = [(None, video_info['conn'][0])] - else: self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') return From 134cff47abe76b180d9a6cd9e9afc364eb52f332 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 6 Aug 2011 11:20:28 +0200 Subject: [PATCH 27/69] Remove debugging information --- youtube-dl | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 76d41c448..3b06908c9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1251,7 +1251,6 @@ class YoutubeIE(InfoExtractor): if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] - print(repr(video_info['conn'][0])) elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] From 0a3c8b6291bb9750115f5188c8500e624c5ab449 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 6 Aug 2011 11:47:53 +0200 Subject: [PATCH 28/69] Use alternative imports for Python 2.4 (Closes #138) --- youtube-dl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3b06908c9..9f391db0d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -12,9 +12,7 @@ from __future__ import with_statement import contextlib import cookielib -import ctypes import datetime -import email.utils import gzip import htmlentitydefs import httplib @@ -34,6 +32,13 @@ import urllib2 import warnings import zlib +if os.name == 'nt': + import ctypes + +try: + import email.utils +except ImportError: # Python 2.4 + import email.Utils try: import cStringIO as StringIO except ImportError: From 1293ce58acc898cf8b423c93b45f227c26ee9f96 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 6 Aug 2011 12:16:07 +0200 Subject: [PATCH 29/69] Fix Python 2.4 compatibility --- youtube-dl | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 9f391db0d..81dd4b83b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -9,8 +9,6 @@ # Author: Gergely Imreh # Author: Philipp Hagemeister # License: Public domain code -from __future__ import with_statement -import contextlib import cookielib import datetime import gzip @@ -712,8 +710,11 @@ class FileDownloader(object): try: descfn = filename + '.description' self.report_writedescription(descfn) - with contextlib.closing(open(descfn, 'wb')) as descfile: + descfile = open(descfn, 'wb') + try: descfile.write(info_dict['description'].encode('utf-8')) + finally: + descfile.close() except (OSError, IOError): self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) return @@ -727,8 +728,11 @@ class FileDownloader(object): self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') return try: - with contextlib.closing(open(infofn, 'wb')) as infof: + infof = open(infofn, 'wb') + try: json.dump(info_dict, infof) + finally: + infof.close() except (OSError, IOError): self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn)) return @@ -2761,7 +2765,11 @@ class BlipTVIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1' + if '?' in url: + cchar = '&' + else: + cchar = '?' + json_url = url + cchar + 'skin=json&version=2&no_wrap=1' request = urllib2.Request(json_url) self.report_extraction(mobj.group(1)) try: @@ -2771,7 +2779,10 @@ class BlipTVIE(InfoExtractor): return try: json_data = json.loads(json_code) - data = json_data['Post'] if 'Post' in json_data else json_data + if 'Post' in json_data: + data = json_data['Post'] + else: + data = json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') video_url = data['media']['url'] From f3dc18d874eb4fd5f6f9099f1e1a13bf8548c1ff Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 7 Aug 2011 00:02:50 +0200 Subject: [PATCH 30/69] youtube: Better error messages --- youtube-dl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 81dd4b83b..c334dee5e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1286,7 +1286,7 @@ class YoutubeIE(InfoExtractor): return video_url_list = [(req_format, url_map[req_format])] # Specific format else: - self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') return for format_param, video_real_url in video_url_list: @@ -1296,7 +1296,6 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ From 8519c32d25e67efc77e74440f42beac6af7d1204 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 7 Aug 2011 00:29:25 +0200 Subject: [PATCH 31/69] Use parse_qs instead of homebrewn parsing --- youtube-dl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index c334dee5e..a809759ab 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1262,9 +1262,9 @@ class YoutubeIE(InfoExtractor): video_url_list = [(None, video_info['conn'][0])] elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') - url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] + url_data = [parse_qs(uds) for uds in url_data_strs] url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) - url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) + url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: @@ -2720,7 +2720,6 @@ class FacebookIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'mp4') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ From 86e709d3dee1c0ac6f21edbd11ba92c026bef7bb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 7 Aug 2011 13:01:09 +0200 Subject: [PATCH 32/69] Fix youtu.be links (Closes #142) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index a809759ab..2a1908d3d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1028,7 +1028,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From 1cab2c6dcf6fae416e08eea368f296b249b2c4bb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 18 Aug 2011 09:31:36 +0200 Subject: [PATCH 33/69] Fix blip.tv regular expression to not match blipXtv --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 2a1908d3d..6a4209222 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2741,7 +2741,7 @@ class FacebookIE(InfoExtractor): class BlipTVIE(InfoExtractor): """Information extractor for blip.tv""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv(/.+)$' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' @staticmethod From e9cb9c2811ab26ef54cf410b0edc52ae56a5158e Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 14:45:26 +0300 Subject: [PATCH 34/69] Add vim modeline --- youtube-dl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube-dl b/youtube-dl index e8b19c8d0..63051bb33 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3001,3 +3001,5 @@ if __name__ == '__main__': sys.exit(u'ERROR: fixed output name but more than one file to download') except KeyboardInterrupt: sys.exit(u'\nERROR: Interrupted by user') + +# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: From 2770590d5a4f2cac5eb11c0b9479e51cafbe0709 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 14:58:22 +0300 Subject: [PATCH 35/69] Use module metadata variables instead of comments --- youtube-dl | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 63051bb33..69ff8d9f4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1,13 +1,18 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Author: Ricardo Garcia Gonzalez -# Author: Danny Colligan -# Author: Benjamin Johnson -# Author: Vasyl' Vavrychuk -# Author: Witold Baryluk -# Author: Paweł Paprota -# Author: Gergely Imreh -# License: Public domain code + +__author__ = ( + "Ricardo Garcia Gonzalez", + "Danny Colligan", + "Benjamin Johnson", + "Vasyl' Vavrychuk", + "Witold Baryluk", + "Paweł Paprota", + "Gergely Imreh", + ) + +__license__ = "Public Domain" + import cookielib import ctypes import datetime From 7a9054ec79febd8befb65dada2899228f642d0a3 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 15:01:51 +0300 Subject: [PATCH 36/69] Fix small indentation inconsistencies --- youtube-dl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 69ff8d9f4..d64ec8134 100755 --- a/youtube-dl +++ b/youtube-dl @@ -711,7 +711,7 @@ class FileDownloader(object): else: # Examine the reported length if (content_length is not None and - (resume_len - 100 < long(content_length) < resume_len + 100)): + (resume_len - 100 < long(content_length) < resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -1690,8 +1690,8 @@ class YahooIE(InfoExtractor): yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + - '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + - '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -2260,9 +2260,9 @@ class YoutubeUserIE(InfoExtractor): video_ids = video_ids[playliststart:] else: video_ids = video_ids[playliststart:playlistend] - + self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % - (username, all_ids_count, len(video_ids))) + (username, all_ids_count, len(video_ids))) for video_id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) @@ -2515,7 +2515,7 @@ class FacebookIE(InfoExtractor): # description video_description = 'No description available.' if (self._downloader.params.get('forcedescription', False) and - 'description' in video_info): + 'description' in video_info): video_description = video_info['description'] url_map = video_info['video_urls'] From 5fb3df4aff7589a6a346578affd0810d079c89c1 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 15:37:35 +0300 Subject: [PATCH 37/69] Move update_self out of __main__ for clarity --- youtube-dl | 51 ++++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/youtube-dl b/youtube-dl index d64ec8134..fe1e6b021 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2698,34 +2698,39 @@ class FFmpegExtractAudioPP(PostProcessor): information['filepath'] = new_path return information -### MAIN PROGRAM ### + +def updateSelf(downloader, filename): + ''' Update the program file with the latest version from the repository ''' + # Note: downloader only used for options + if not os.access(filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + + downloader.to_screen('Updating to latest stable version...') + + try: + latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' + latest_version = urllib.urlopen(latest_url).read().strip() + prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + newcontent = urllib.urlopen(prog_url).read() + except (IOError, OSError), err: + sys.exit('ERROR: unable to download latest version') + + try: + stream = open(filename, 'w') + stream.write(newcontent) + stream.close() + except (IOError, OSError), err: + sys.exit('ERROR: unable to overwrite current version') + + downloader.to_screen('Updated to version %s' % latest_version) + + if __name__ == '__main__': try: # Modules needed only when running the main program import getpass import optparse - # Function to update the program file with the latest version from the repository. - def update_self(downloader, filename): - # Note: downloader only used for options - if not os.access(filename, os.W_OK): - sys.exit('ERROR: no write permissions on %s' % filename) - - downloader.to_screen('Updating to latest stable version...') - try: - latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' - latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version - newcontent = urllib.urlopen(prog_url).read() - except (IOError, OSError), err: - sys.exit('ERROR: unable to download latest version') - try: - stream = open(filename, 'w') - stream.write(newcontent) - stream.close() - except (IOError, OSError), err: - sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updated to version %s' % latest_version) # Parse command line parser = optparse.OptionParser( @@ -2981,7 +2986,7 @@ if __name__ == '__main__': # Update version if opts.update_self: - update_self(fd, sys.argv[0]) + updateSelf(fd, sys.argv[0]) # Maybe do nothing if len(all_urls) < 1: From 4f9f96f646cb8df69f77b04bf6af9f4a50aa0b47 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 15:53:36 +0300 Subject: [PATCH 38/69] Option parsing refactoring ; Moved version string to __version__ Brings terser option formatting to youtube-dl: from: -u USERNAME, --username USERNAME to: -u, --username USERNAME --- youtube-dl | 250 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 142 insertions(+), 108 deletions(-) diff --git a/youtube-dl b/youtube-dl index fe1e6b021..d093e2558 100755 --- a/youtube-dl +++ b/youtube-dl @@ -12,6 +12,7 @@ __author__ = ( ) __license__ = "Public Domain" +__version__ = '2011.08.04' import cookielib import ctypes @@ -2725,116 +2726,149 @@ def updateSelf(downloader, filename): downloader.to_screen('Updated to version %s' % latest_version) +def parseOpts(): + # Deferred imports + import getpass + import optparse + + def _format_option_string(option): + ''' ('-o', '--option') -> -o, --format METAVAR''' + + opts = [] + + if option._short_opts: opts.append(option._short_opts[0]) + if option._long_opts: opts.append(option._long_opts[0]) + if len(opts) > 1: opts.insert(1, ', ') + + if option.takes_value(): opts.append(' %s' % option.metavar) + + return "".join(opts) + + fmt = optparse.IndentedHelpFormatter() + fmt.format_option_strings = _format_option_string + + kw = { + 'version' : __version__, + 'formatter' : fmt, + 'usage' : 'Usage : %prog [options] url...', + 'conflict_handler' : 'resolve', + } + + parser = optparse.OptionParser(**kw) + + # option groups + general = optparse.OptionGroup(parser, 'General Options') + authentication = optparse.OptionGroup(parser, 'Authentication Options') + video_format = optparse.OptionGroup(parser, 'Video Format Options') + postproc = optparse.OptionGroup(parser, 'Post-processing Options') + filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + + general.add_option('-h', '--help', + action='help', help='print this help text and exit') + general.add_option('-v', '--version', + action='version', help='print program version and exit') + general.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest stable version') + general.add_option('-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) + general.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') + general.add_option('-R', '--retries', + dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + general.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) + general.add_option('--playlist-end', + dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) + general.add_option('--dump-user-agent', + action='store_true', dest='dump_user_agent', + help='display the current browser identification', default=False) + + authentication.add_option('-u', '--username', + dest='username', metavar='USERNAME', help='account username') + authentication.add_option('-p', '--password', + dest='password', metavar='PASSWORD', help='account password') + authentication.add_option('-n', '--netrc', + action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + + + video_format.add_option('-f', '--format', + action='store', dest='format', metavar='FORMAT', help='video format code') + video_format.add_option('--all-formats', + action='store_const', dest='format', help='download all available video formats', const='-1') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + + + verbosity.add_option('-q', '--quiet', + action='store_true', dest='quiet', help='activates quiet mode', default=False) + verbosity.add_option('-s', '--simulate', + action='store_true', dest='simulate', help='do not download video', default=False) + verbosity.add_option('-g', '--get-url', + action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) + verbosity.add_option('-e', '--get-title', + action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--get-thumbnail', + action='store_true', dest='getthumbnail', + help='simulate, quiet but print thumbnail URL', default=False) + verbosity.add_option('--get-description', + action='store_true', dest='getdescription', + help='simulate, quiet but print video description', default=False) + verbosity.add_option('--get-filename', + action='store_true', dest='getfilename', + help='simulate, quiet but print output filename', default=False) + verbosity.add_option('--no-progress', + action='store_true', dest='noprogress', help='do not print progress bar', default=False) + verbosity.add_option('--console-title', + action='store_true', dest='consoletitle', + help='display progress in console titlebar', default=False) + + + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='useliteral', help='use literal title in file name', default=False) + filesystem.add_option('-A', '--auto-number', + action='store_true', dest='autonumber', + help='number downloaded files starting from 00000', default=False) + filesystem.add_option('-o', '--output', + dest='outtmpl', metavar='TEMPLATE', help='output filename template') + filesystem.add_option('-a', '--batch-file', + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') + filesystem.add_option('-w', '--no-overwrites', + action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) + filesystem.add_option('-c', '--continue', + action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + filesystem.add_option('--cookies', + dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') + filesystem.add_option('--no-part', + action='store_true', dest='nopart', help='do not use .part files', default=False) + filesystem.add_option('--no-mtime', + action='store_false', dest='updatetime', + help='do not use the Last-modified header to set the file modification time', default=True) + + + postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, + help='convert video files to audio-only files (requires ffmpeg and ffprobe)') + postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', + help='"best", "aac" or "mp3"; best by default') + + + parser.add_option_group(general) + parser.add_option_group(filesystem) + parser.add_option_group(verbosity) + parser.add_option_group(video_format) + parser.add_option_group(authentication) + parser.add_option_group(postproc) + + opts, args = parser.parse_args() + + return parser, opts, args + + if __name__ == '__main__': try: - # Modules needed only when running the main program - import getpass - import optparse - - - # Parse command line - parser = optparse.OptionParser( - usage='Usage: %prog [options] url...', - version='2011.08.04', - conflict_handler='resolve', - ) - - parser.add_option('-h', '--help', - action='help', help='print this help text and exit') - parser.add_option('-v', '--version', - action='version', help='print program version and exit') - parser.add_option('-U', '--update', - action='store_true', dest='update_self', help='update this program to latest stable version') - parser.add_option('-i', '--ignore-errors', - action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) - parser.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') - parser.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) - parser.add_option('--playlist-start', - dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) - parser.add_option('--playlist-end', - dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) - parser.add_option('--dump-user-agent', - action='store_true', dest='dump_user_agent', - help='display the current browser identification', default=False) - - authentication = optparse.OptionGroup(parser, 'Authentication Options') - authentication.add_option('-u', '--username', - dest='username', metavar='USERNAME', help='account username') - authentication.add_option('-p', '--password', - dest='password', metavar='PASSWORD', help='account password') - authentication.add_option('-n', '--netrc', - action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) - parser.add_option_group(authentication) - - video_format = optparse.OptionGroup(parser, 'Video Format Options') - video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('--all-formats', - action='store_const', dest='format', help='download all available video formats', const='-1') - video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') - parser.add_option_group(video_format) - - verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') - verbosity.add_option('-q', '--quiet', - action='store_true', dest='quiet', help='activates quiet mode', default=False) - verbosity.add_option('-s', '--simulate', - action='store_true', dest='simulate', help='do not download video', default=False) - verbosity.add_option('-g', '--get-url', - action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) - verbosity.add_option('-e', '--get-title', - action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) - verbosity.add_option('--get-thumbnail', - action='store_true', dest='getthumbnail', - help='simulate, quiet but print thumbnail URL', default=False) - verbosity.add_option('--get-description', - action='store_true', dest='getdescription', - help='simulate, quiet but print video description', default=False) - verbosity.add_option('--get-filename', - action='store_true', dest='getfilename', - help='simulate, quiet but print output filename', default=False) - verbosity.add_option('--no-progress', - action='store_true', dest='noprogress', help='do not print progress bar', default=False) - verbosity.add_option('--console-title', - action='store_true', dest='consoletitle', - help='display progress in console titlebar', default=False) - parser.add_option_group(verbosity) - - filesystem = optparse.OptionGroup(parser, 'Filesystem Options') - filesystem.add_option('-t', '--title', - action='store_true', dest='usetitle', help='use title in file name', default=False) - filesystem.add_option('-l', '--literal', - action='store_true', dest='useliteral', help='use literal title in file name', default=False) - filesystem.add_option('-A', '--auto-number', - action='store_true', dest='autonumber', - help='number downloaded files starting from 00000', default=False) - filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', help='output filename template') - filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') - filesystem.add_option('-w', '--no-overwrites', - action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) - filesystem.add_option('-c', '--continue', - action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) - filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') - filesystem.add_option('--no-part', - action='store_true', dest='nopart', help='do not use .part files', default=False) - filesystem.add_option('--no-mtime', - action='store_false', dest='updatetime', - help='do not use the Last-modified header to set the file modification time', default=True) - parser.add_option_group(filesystem) - - postproc = optparse.OptionGroup(parser, 'Post-processing Options') - postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, - help='convert video files to audio-only files (requires ffmpeg and ffprobe)') - postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac" or "mp3"; best by default') - parser.add_option_group(postproc) - - (opts, args) = parser.parse_args() + parser, opts, args = parseOpts() # Open appropriate CookieJar if opts.cookiefile is None: From 51c8e53ffe62a126a89a5a5ebbaf360a639c9352 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 16:42:51 +0300 Subject: [PATCH 39/69] Set help formatter width to terminal width (prevents wrapping) --- youtube-dl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d093e2558..ac27afdc3 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2744,7 +2744,14 @@ def parseOpts(): return "".join(opts) - fmt = optparse.IndentedHelpFormatter() + max_width = 80 + max_help_position = 80 + + # No need to wrap help messages if we're on a wide console + columns = os.environ.get('COLUMNS', None) + if columns: max_width = columns + + fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) fmt.format_option_strings = _format_option_string kw = { From 5adcaa43854a4b6bfd0d5e01304bebc7a846fd3d Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 16:48:08 +0300 Subject: [PATCH 40/69] Refactor main function --- youtube-dl | 344 +++++++++++++++++++++++++++-------------------------- 1 file changed, 173 insertions(+), 171 deletions(-) diff --git a/youtube-dl b/youtube-dl index ac27afdc3..34a60afff 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2872,180 +2872,182 @@ def parseOpts(): return parser, opts, args +def main(): + parser, opts, args = parseOpts() + + # Open appropriate CookieJar + if opts.cookiefile is None: + jar = cookielib.CookieJar() + else: + try: + jar = cookielib.MozillaCookieJar(opts.cookiefile) + if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): + jar.load() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to open cookie file') + + # Dump user agent + if opts.dump_user_agent: + print std_headers['User-Agent'] + sys.exit(0) + + # General configuration + cookie_processor = urllib2.HTTPCookieProcessor(jar) + urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + + # Batch file verification + batchurls = [] + if opts.batchfile is not None: + try: + if opts.batchfile == '-': + batchfd = sys.stdin + else: + batchfd = open(opts.batchfile, 'r') + batchurls = batchfd.readlines() + batchurls = [x.strip() for x in batchurls] + batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] + except IOError: + sys.exit(u'ERROR: batch file could not be read') + all_urls = batchurls + args + + # Conflicting, missing and erroneous options + if opts.usenetrc and (opts.username is not None or opts.password is not None): + parser.error(u'using .netrc conflicts with giving username/password') + if opts.password is not None and opts.username is None: + parser.error(u'account username missing') + if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): + parser.error(u'using output template conflicts with using title, literal title or auto number') + if opts.usetitle and opts.useliteral: + parser.error(u'using title conflicts with using literal title') + if opts.username is not None and opts.password is None: + opts.password = getpass.getpass(u'Type account password and press return:') + if opts.ratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) + if numeric_limit is None: + parser.error(u'invalid rate limit specified') + opts.ratelimit = numeric_limit + if opts.retries is not None: + try: + opts.retries = long(opts.retries) + except (TypeError, ValueError), err: + parser.error(u'invalid retry count specified') + try: + opts.playliststart = long(opts.playliststart) + if opts.playliststart <= 0: + raise ValueError + except (TypeError, ValueError), err: + parser.error(u'invalid playlist start number specified') + try: + opts.playlistend = long(opts.playlistend) + if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): + raise ValueError + except (TypeError, ValueError), err: + parser.error(u'invalid playlist end number specified') + if opts.extractaudio: + if opts.audioformat not in ['best', 'aac', 'mp3']: + parser.error(u'invalid audio format specified') + + # Information extractors + youtube_ie = YoutubeIE() + metacafe_ie = MetacafeIE(youtube_ie) + dailymotion_ie = DailymotionIE() + youtube_pl_ie = YoutubePlaylistIE(youtube_ie) + youtube_user_ie = YoutubeUserIE(youtube_ie) + youtube_search_ie = YoutubeSearchIE(youtube_ie) + google_ie = GoogleIE() + google_search_ie = GoogleSearchIE(google_ie) + photobucket_ie = PhotobucketIE() + yahoo_ie = YahooIE() + yahoo_search_ie = YahooSearchIE(yahoo_ie) + deposit_files_ie = DepositFilesIE() + facebook_ie = FacebookIE() + generic_ie = GenericIE() + + # File downloader + fd = FileDownloader({ + 'usenetrc': opts.usenetrc, + 'username': opts.username, + 'password': opts.password, + 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), + 'forceurl': opts.geturl, + 'forcetitle': opts.gettitle, + 'forcethumbnail': opts.getthumbnail, + 'forcedescription': opts.getdescription, + 'forcefilename': opts.getfilename, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), + 'format': opts.format, + 'format_limit': opts.format_limit, + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') + or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') + or u'%(id)s.%(ext)s'), + 'ignoreerrors': opts.ignoreerrors, + 'ratelimit': opts.ratelimit, + 'nooverwrites': opts.nooverwrites, + 'retries': opts.retries, + 'continuedl': opts.continue_dl, + 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, + 'playlistend': opts.playlistend, + 'logtostderr': opts.outtmpl == '-', + 'consoletitle': opts.consoletitle, + 'nopart': opts.nopart, + 'updatetime': opts.updatetime, + }) + fd.add_info_extractor(youtube_search_ie) + fd.add_info_extractor(youtube_pl_ie) + fd.add_info_extractor(youtube_user_ie) + fd.add_info_extractor(metacafe_ie) + fd.add_info_extractor(dailymotion_ie) + fd.add_info_extractor(youtube_ie) + fd.add_info_extractor(google_ie) + fd.add_info_extractor(google_search_ie) + fd.add_info_extractor(photobucket_ie) + fd.add_info_extractor(yahoo_ie) + fd.add_info_extractor(yahoo_search_ie) + fd.add_info_extractor(deposit_files_ie) + fd.add_info_extractor(facebook_ie) + + # This must come last since it's the + # fallback if none of the others work + fd.add_info_extractor(generic_ie) + + # PostProcessors + if opts.extractaudio: + fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) + + # Update version + if opts.update_self: + updateSelf(fd, sys.argv[0]) + + # Maybe do nothing + if len(all_urls) < 1: + if not opts.update_self: + parser.error(u'you must provide at least one URL') + else: + sys.exit() + retcode = fd.download(all_urls) + + # Dump cookie jar if requested + if opts.cookiefile is not None: + try: + jar.save() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to save cookie jar') + + sys.exit(retcode) + if __name__ == '__main__': try: - parser, opts, args = parseOpts() - - # Open appropriate CookieJar - if opts.cookiefile is None: - jar = cookielib.CookieJar() - else: - try: - jar = cookielib.MozillaCookieJar(opts.cookiefile) - if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): - jar.load() - except (IOError, OSError), err: - sys.exit(u'ERROR: unable to open cookie file') - - # Dump user agent - if opts.dump_user_agent: - print std_headers['User-Agent'] - sys.exit(0) - - # General configuration - cookie_processor = urllib2.HTTPCookieProcessor(jar) - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) - socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) - - # Batch file verification - batchurls = [] - if opts.batchfile is not None: - try: - if opts.batchfile == '-': - batchfd = sys.stdin - else: - batchfd = open(opts.batchfile, 'r') - batchurls = batchfd.readlines() - batchurls = [x.strip() for x in batchurls] - batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] - except IOError: - sys.exit(u'ERROR: batch file could not be read') - all_urls = batchurls + args - - # Conflicting, missing and erroneous options - if opts.usenetrc and (opts.username is not None or opts.password is not None): - parser.error(u'using .netrc conflicts with giving username/password') - if opts.password is not None and opts.username is None: - parser.error(u'account username missing') - if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): - parser.error(u'using output template conflicts with using title, literal title or auto number') - if opts.usetitle and opts.useliteral: - parser.error(u'using title conflicts with using literal title') - if opts.username is not None and opts.password is None: - opts.password = getpass.getpass(u'Type account password and press return:') - if opts.ratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) - if numeric_limit is None: - parser.error(u'invalid rate limit specified') - opts.ratelimit = numeric_limit - if opts.retries is not None: - try: - opts.retries = long(opts.retries) - except (TypeError, ValueError), err: - parser.error(u'invalid retry count specified') - try: - opts.playliststart = long(opts.playliststart) - if opts.playliststart <= 0: - raise ValueError - except (TypeError, ValueError), err: - parser.error(u'invalid playlist start number specified') - try: - opts.playlistend = long(opts.playlistend) - if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): - raise ValueError - except (TypeError, ValueError), err: - parser.error(u'invalid playlist end number specified') - if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'mp3']: - parser.error(u'invalid audio format specified') - - # Information extractors - youtube_ie = YoutubeIE() - metacafe_ie = MetacafeIE(youtube_ie) - dailymotion_ie = DailymotionIE() - youtube_pl_ie = YoutubePlaylistIE(youtube_ie) - youtube_user_ie = YoutubeUserIE(youtube_ie) - youtube_search_ie = YoutubeSearchIE(youtube_ie) - google_ie = GoogleIE() - google_search_ie = GoogleSearchIE(google_ie) - photobucket_ie = PhotobucketIE() - yahoo_ie = YahooIE() - yahoo_search_ie = YahooSearchIE(yahoo_ie) - deposit_files_ie = DepositFilesIE() - facebook_ie = FacebookIE() - generic_ie = GenericIE() - - # File downloader - fd = FileDownloader({ - 'usenetrc': opts.usenetrc, - 'username': opts.username, - 'password': opts.password, - 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), - 'forceurl': opts.geturl, - 'forcetitle': opts.gettitle, - 'forcethumbnail': opts.getthumbnail, - 'forcedescription': opts.getdescription, - 'forcefilename': opts.getfilename, - 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), - 'format': opts.format, - 'format_limit': opts.format_limit, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) - or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') - or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') - or u'%(id)s.%(ext)s'), - 'ignoreerrors': opts.ignoreerrors, - 'ratelimit': opts.ratelimit, - 'nooverwrites': opts.nooverwrites, - 'retries': opts.retries, - 'continuedl': opts.continue_dl, - 'noprogress': opts.noprogress, - 'playliststart': opts.playliststart, - 'playlistend': opts.playlistend, - 'logtostderr': opts.outtmpl == '-', - 'consoletitle': opts.consoletitle, - 'nopart': opts.nopart, - 'updatetime': opts.updatetime, - }) - fd.add_info_extractor(youtube_search_ie) - fd.add_info_extractor(youtube_pl_ie) - fd.add_info_extractor(youtube_user_ie) - fd.add_info_extractor(metacafe_ie) - fd.add_info_extractor(dailymotion_ie) - fd.add_info_extractor(youtube_ie) - fd.add_info_extractor(google_ie) - fd.add_info_extractor(google_search_ie) - fd.add_info_extractor(photobucket_ie) - fd.add_info_extractor(yahoo_ie) - fd.add_info_extractor(yahoo_search_ie) - fd.add_info_extractor(deposit_files_ie) - fd.add_info_extractor(facebook_ie) - - # This must come last since it's the - # fallback if none of the others work - fd.add_info_extractor(generic_ie) - - # PostProcessors - if opts.extractaudio: - fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) - - # Update version - if opts.update_self: - updateSelf(fd, sys.argv[0]) - - # Maybe do nothing - if len(all_urls) < 1: - if not opts.update_self: - parser.error(u'you must provide at least one URL') - else: - sys.exit() - retcode = fd.download(all_urls) - - # Dump cookie jar if requested - if opts.cookiefile is not None: - try: - jar.save() - except (IOError, OSError), err: - sys.exit(u'ERROR: unable to save cookie jar') - - sys.exit(retcode) - + main() except DownloadError: sys.exit(1) except SameFileError: From 6a4f0a114d88965c171d0117db68be64b4db9acd Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 17:03:28 +0300 Subject: [PATCH 41/69] Use `stty size` to find terminal width if we're on linux and COLUMNS is not exported --- youtube-dl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 34a60afff..251254765 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2744,11 +2744,21 @@ def parseOpts(): return "".join(opts) + def _find_term_columns(): + columns = os.environ.get('COLUMNS', None) + if columns: return int(columns) + + if sys.platform.startswith('linux'): + try: columns = os.popen('stty size', 'r').read().split()[1] + except: pass + + if columns: return int(columns) + max_width = 80 max_help_position = 80 # No need to wrap help messages if we're on a wide console - columns = os.environ.get('COLUMNS', None) + columns = _find_term_columns() if columns: max_width = columns fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) From 4f2a5e06da89913f789463fef919e61f9d8f5be6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 24 Aug 2011 23:28:30 +0200 Subject: [PATCH 42/69] Use subprocess to call stty size when COLUMNS is not set --- youtube-dl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index 1ae68b2b5..32bd99e98 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2996,11 +2996,12 @@ def parseOpts(): if columns: return int(columns) - # TODO: Breaks on phihag's system - #if sys.platform.startswith('linux'): - # try: - # return os.popen('stty size', 'r').read().split()[1] - # except: pass + try: + sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out,err = sp.communicate() + return out.split()[1] + except: + pass return None max_width = 80 From eb0387a848d5d349895932dec6d2676adc0ab9c9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 25 Aug 2011 00:08:59 +0200 Subject: [PATCH 43/69] Fix stty detection --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 32bd99e98..3d43355c7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2999,7 +2999,7 @@ def parseOpts(): try: sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out,err = sp.communicate() - return out.split()[1] + return int(out.split()[1]) except: pass return None From 4618f3da74c61c79be3187a2818ea1ce6b28bb18 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 25 Aug 2011 00:09:28 +0200 Subject: [PATCH 44/69] Makefile to recreate README --- Makefile | 15 +++++++++ README.md | 99 +++++++++++++++++++++++++------------------------------ 2 files changed, 59 insertions(+), 55 deletions(-) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..3ed7108f8 --- /dev/null +++ b/Makefile @@ -0,0 +1,15 @@ + + +update-readme: + @options=$$(COLUMNS=80 ./youtube-dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/### \1/') && \ + header=$$(sed -e '/.*## OPTIONS/,$$ d' README.md) && \ + footer=$$(sed -e '1,/.*## COPYRIGHT/ d' README.md) && \ + echo "$${header}" > README.md && \ + echo -e '\n## OPTIONS' >> README.md && \ + echo "$${options}" >> README.md&& \ + echo -e '\n## COPYRIGHT' >> README.md && \ + echo "$${footer}" >> README.md + + + +.PHONY: update-readme diff --git a/README.md b/README.md index cf38e31fc..d0c255201 100644 --- a/README.md +++ b/README.md @@ -11,66 +11,56 @@ your Unix box, in Windows or in Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. ## OPTIONS - -h, --help print this help text and exit - -v, --version print program version and exit - -U, --update update this program to latest stable version - -i, --ignore-errors continue on download errors - -r LIMIT, --rate-limit=LIMIT - download rate limit (e.g. 50k or 44.6m) - -R RETRIES, --retries=RETRIES - number of retries (default is 10) - --playlist-start=NUMBER - playlist video to start at (default is 1) - --playlist-end=NUMBER - playlist video to end at (default is last) - --dump-user-agent display the current browser identification + -h, --help print this help text and exit + -v, --version print program version and exit + -U, --update update this program to latest stable version + -i, --ignore-errors continue on download errors + -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) + -R, --retries RETRIES number of retries (default is 10) + --playlist-start NUMBER playlist video to start at (default is 1) + --playlist-end NUMBER playlist video to end at (default is last) + --dump-user-agent display the current browser identification -### Authentication Options - -u USERNAME, --username=USERNAME - account username - -p PASSWORD, --password=PASSWORD - account password - -n, --netrc use .netrc authentication data +### Filesystem Options: + -t, --title use title in file name + -l, --literal use literal title in file name + -A, --auto-number number downloaded files starting from 00000 + -o, --output TEMPLATE output filename template + -a, --batch-file FILE file containing URLs to download ('-' for stdin) + -w, --no-overwrites do not overwrite files + -c, --continue resume partially downloaded files + --cookies FILE file to dump cookie jar to + --no-part do not use .part files + --no-mtime do not use the Last-modified header to set the file + modification time + --write-description write video description to a .description file + --write-info-json write video metadata to a .info.json file -### Video Format Options - -f FORMAT, --format=FORMAT - video format code - --all-formats download all available video formats - --max-quality=FORMAT - highest quality format to download +### Verbosity / Simulation Options: + -q, --quiet activates quiet mode + -s, --simulate do not download video + -g, --get-url simulate, quiet but print URL + -e, --get-title simulate, quiet but print title + --get-thumbnail simulate, quiet but print thumbnail URL + --get-description simulate, quiet but print video description + --get-filename simulate, quiet but print output filename + --no-progress do not print progress bar + --console-title display progress in console titlebar -### Verbosity / Simulation Options - -q, --quiet activates quiet mode - -s, --simulate do not download video - -g, --get-url simulate, quiet but print URL - -e, --get-title simulate, quiet but print title - --get-thumbnail simulate, quiet but print thumbnail URL - --get-description simulate, quiet but print video description - --get-filename simulate, quiet but print output filename - --no-progress do not print progress bar - --console-title display progress in console titlebar +### Video Format Options: + -f, --format FORMAT video format code + --all-formats download all available video formats + --max-quality FORMAT highest quality format to download -### Filesystem Options - -t, --title use title in file name - -l, --literal use literal title in file name - -A, --auto-number number downloaded files starting from 00000 - -o TEMPLATE, --output=TEMPLATE - output filename template - -a FILE, --batch-file=FILE - file containing URLs to download ('-' for stdin) - -w, --no-overwrites - do not overwrite files - -c, --continue resume partially downloaded files - --cookies=FILE file to dump cookie jar to - --no-part do not use .part files - --no-mtime do not use the Last-modified header to set the file - modification time +### Authentication Options: + -u, --username USERNAME account username + -p, --password PASSWORD account password + -n, --netrc use .netrc authentication data ### Post-processing Options: - --extract-audio convert video files to audio-only files (requires - ffmpeg and ffprobe) - --audio-format=FORMAT - "best", "aac" or "mp3"; best by default + --extract-audio convert video files to audio-only files (requires + ffmpeg and ffprobe) + --audio-format FORMAT "best", "aac" or "mp3"; best by default ## COPYRIGHT **youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is @@ -80,4 +70,3 @@ likewise released into the public domain. ## BUGS Bugs should be reported at: - From 5f9f2b739678dc8a02879714c8987dd887040676 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 22:10:03 +0200 Subject: [PATCH 45/69] Update: Write downloaded file without modification (allows hashsums) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 3d43355c7..11d8ca01c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2965,7 +2965,7 @@ def updateSelf(downloader, filename): sys.exit('ERROR: unable to download latest version') try: - stream = open(filename, 'w') + stream = open(filename, 'wb') stream.write(newcontent) stream.close() except (IOError, OSError), err: From 6ae796b1eebcb77b1f0c952682f86956a48fe462 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 23:17:18 +0200 Subject: [PATCH 46/69] =?UTF-8?q?Credit=20Rog=C3=A9rio=20Brito=20for=20Vim?= =?UTF-8?q?eo=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 218782e0d..ecc4c26bb 100755 --- a/youtube-dl +++ b/youtube-dl @@ -9,10 +9,11 @@ __author__ = ( 'Witold Baryluk', 'Paweł Paprota', 'Gergely Imreh', + 'Rogério Brito', ) __license__ = 'Public Domain' -__version__ = '2011.08.24-phihag' +__version__ = '2011.08.28-phihag' import cookielib import datetime From 5fd5ce083843baaf9ed0ecd902c7d5913110bc2b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 23:17:32 +0200 Subject: [PATCH 47/69] Add default make target --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3ed7108f8..21558bb81 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +default: update + +update: update-readme update-readme: @@ -12,4 +15,4 @@ update-readme: -.PHONY: update-readme +.PHONY: default update update-readme From d207e7cf88947df1ded016959f921f0e3e172159 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 23:38:40 +0200 Subject: [PATCH 48/69] Update update mechanism (Closes #4) --- LATEST_VERSION | 1 - youtube-dl | 23 ++++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) delete mode 100644 LATEST_VERSION diff --git a/LATEST_VERSION b/LATEST_VERSION deleted file mode 100644 index 8a3bb049a..000000000 --- a/LATEST_VERSION +++ /dev/null @@ -1 +0,0 @@ -2011.08.04 diff --git a/youtube-dl b/youtube-dl index 5ffd53bd3..534c7ac87 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,6 +15,8 @@ __author__ = ( __license__ = 'Public Domain' __version__ = '2011.08.28-phihag' +UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl' + import cookielib import datetime import gzip @@ -3071,24 +3073,27 @@ def updateSelf(downloader, filename): if not os.access(filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) - downloader.to_screen('Updating to latest stable version...') + downloader.to_screen('Updating to latest version...') try: - latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' - latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version - newcontent = urllib.urlopen(prog_url).read() + try: + urlh = urllib.urlopen(UPDATE_URL) + newcontent = urlh.read() + finally: + urlh.close() except (IOError, OSError), err: sys.exit('ERROR: unable to download latest version') try: - stream = open(filename, 'wb') - stream.write(newcontent) - stream.close() + outf = open(filename, 'wb') + try: + outf.write(newcontent) + finally: + outf.close() except (IOError, OSError), err: sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updated to version %s' % latest_version) + downloader.to_screen('Updated youtube-dl. Restart to use the new version.') def parseOpts(): # Deferred imports From eb11aaccbb7350618ba784fce1f5511db1fa5c81 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 23:44:23 +0200 Subject: [PATCH 49/69] Update bug reporting to this fork, so that vimeo/blip.tv issues are reported at phihag/issues instead of rg3/issues (Closes #5) --- README.md | 2 +- youtube-dl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d0c255201..8ad8e5ca1 100644 --- a/README.md +++ b/README.md @@ -69,4 +69,4 @@ originally written by Daniel Bolton () and is likewise released into the public domain. ## BUGS -Bugs should be reported at: +Bugs should be reported at: diff --git a/youtube-dl b/youtube-dl index 534c7ac87..ec9802ba1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -10,6 +10,7 @@ __author__ = ( 'Paweł Paprota', 'Gergely Imreh', 'Rogério Brito', + 'Philipp Hagemeister', ) __license__ = 'Public Domain' From e0e56865a0caf52dcc3c8fa7897c94bc292ab91a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 31 Aug 2011 21:28:40 +0200 Subject: [PATCH 50/69] Remove stable from help wording (There will be only one main branch for now) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index ec9802ba1..c919c4016 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3159,7 +3159,7 @@ def parseOpts(): general.add_option('-v', '--version', action='version', help='print program version and exit') general.add_option('-U', '--update', - action='store_true', dest='update_self', help='update this program to latest stable version') + action='store_true', dest='update_self', help='update this program to latest version') general.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) general.add_option('-r', '--rate-limit', From dbddab27992fa609102d512a7762ae8f5b39c55c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 3 Sep 2011 11:32:05 +0200 Subject: [PATCH 51/69] Robust error handling in downloading code --- youtube-dl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube-dl b/youtube-dl index 68c9bc429..3f45e92f2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -930,6 +930,7 @@ class FileDownloader(object): if stream is None: try: (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) + assert stream is not None filename = self.undo_temp_name(tmpfilename) self.report_destination(filename) except (OSError, IOError), err: @@ -951,6 +952,9 @@ class FileDownloader(object): # Apply rate limit self.slow_down(start, byte_counter - resume_len) + if stream is None: + self.trouble(u'\nERROR: Did not get any data blocks') + return False stream.close() self.report_finish() if data_len is not None and byte_counter != data_len: From 447b1d7170219c391bfefa9efb966adb4e4e4d6b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 4 Sep 2011 11:41:54 +0200 Subject: [PATCH 52/69] Added FAQ to README --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 8ad8e5ca1..db75487e3 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,24 @@ which means you can modify it, redistribute it or use it however you like. ffmpeg and ffprobe) --audio-format FORMAT "best", "aac" or "mp3"; best by default +## FAQ + +### Can you please put the -b option back? + +Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. + +### I get HTTP error 402 when trying to download a video. What''s this? + +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/phihag/youtube-dl/issues/8), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. + +### I have downloaded a video but how can I play it? + +Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). + +### The links provided by youtube-dl -g are not working anymore + +The URLs youtube-dl outputs require the downloader to have the correct cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. + ## COPYRIGHT **youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is released into the public domain by the copyright holder. This README file was From 8f88eb1fa70598c9615b7e58ae3f2497627a1925 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 4 Sep 2011 11:47:58 +0200 Subject: [PATCH 53/69] Update Makefile to new README format --- Makefile | 4 ++-- README.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 21558bb81..0039f90c9 100644 --- a/Makefile +++ b/Makefile @@ -6,11 +6,11 @@ update: update-readme update-readme: @options=$$(COLUMNS=80 ./youtube-dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/### \1/') && \ header=$$(sed -e '/.*## OPTIONS/,$$ d' README.md) && \ - footer=$$(sed -e '1,/.*## COPYRIGHT/ d' README.md) && \ + footer=$$(sed -e '1,/.*## FAQ/ d' README.md) && \ echo "$${header}" > README.md && \ echo -e '\n## OPTIONS' >> README.md && \ echo "$${options}" >> README.md&& \ - echo -e '\n## COPYRIGHT' >> README.md && \ + echo -e '\n## FAQ' >> README.md && \ echo "$${footer}" >> README.md diff --git a/README.md b/README.md index db75487e3..66639ad04 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ which means you can modify it, redistribute it or use it however you like. ## OPTIONS -h, --help print this help text and exit -v, --version print program version and exit - -U, --update update this program to latest stable version + -U, --update update this program to latest version -i, --ignore-errors continue on download errors -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) -R, --retries RETRIES number of retries (default is 10) From c0a10ca8dcbcf345913502f125cf011889118917 Mon Sep 17 00:00:00 2001 From: FND Date: Mon, 5 Sep 2011 09:46:36 +0200 Subject: [PATCH 54/69] fixed PEP8 whitespace issues mostly vertical whitespace and mixed spaces and tabs --- youtube-dl | 88 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/youtube-dl b/youtube-dl index e8b19c8d0..36ca6baf9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -47,6 +47,7 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') + def preferredencoding(): """Get preferred encoding. @@ -63,6 +64,7 @@ def preferredencoding(): yield pref return yield_preferredencoding().next() + def htmlentity_transform(matchobj): """Transforms an HTML entity to a Unicode character. @@ -89,11 +91,13 @@ def htmlentity_transform(matchobj): # Unknown entity in name, return its literal representation return (u'&%s;' % entity) + def sanitize_title(utitle): """Sanitizes a video title so it could be used as part of a filename.""" utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle) return utitle.replace(unicode(os.sep), u'%') + def sanitize_open(filename, open_mode): """Try to open the given filename, and slightly tweak it if this fails. @@ -120,13 +124,15 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) + def timeconvert(timestr): - """Convert RFC 2822 defined time string into system timestamp""" - timestamp = None - timetuple = email.utils.parsedate_tz(timestr) - if timetuple is not None: - timestamp = email.utils.mktime_tz(timetuple) - return timestamp + """Convert RFC 2822 defined time string into system timestamp""" + timestamp = None + timetuple = email.utils.parsedate_tz(timestr) + if timetuple is not None: + timestamp = email.utils.mktime_tz(timetuple) + return timestamp + class DownloadError(Exception): """Download Error exception. @@ -137,6 +143,7 @@ class DownloadError(Exception): """ pass + class SameFileError(Exception): """Same File exception. @@ -145,6 +152,7 @@ class SameFileError(Exception): """ pass + class PostProcessingError(Exception): """Post Processing exception. @@ -153,6 +161,7 @@ class PostProcessingError(Exception): """ pass + class UnavailableVideoError(Exception): """Unavailable Format exception. @@ -161,6 +170,7 @@ class UnavailableVideoError(Exception): """ pass + class ContentTooShortError(Exception): """Content Too Short exception. @@ -176,6 +186,7 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected + class YoutubeDLHandler(urllib2.HTTPHandler): """Handler for HTTP requests and responses. @@ -185,11 +196,11 @@ class YoutubeDLHandler(urllib2.HTTPHandler): a particular request, the original request in the program code only has to include the HTTP header "Youtubedl-No-Compression", which will be removed before making the real request. - + Part of this code was copied from: - http://techknack.net/python-urllib2-handlers/ - + http://techknack.net/python-urllib2-handlers/ + Andrew Rowls, the author of that code, agreed to release it to the public domain. """ @@ -200,7 +211,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): return zlib.decompress(data, -zlib.MAX_WBITS) except zlib.error: return zlib.decompress(data) - + @staticmethod def addinfourl_wrapper(stream, headers, url, code): if hasattr(urllib2.addinfourl, 'getcode'): @@ -208,7 +219,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): ret = urllib2.addinfourl(stream, headers, url) ret.code = code return ret - + def http_request(self, req): for h in std_headers: if h in req.headers: @@ -234,6 +245,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): resp.msg = old_resp.msg return resp + class FileDownloader(object): """File Downloader class. @@ -325,7 +337,7 @@ class FileDownloader(object): else: exponent = long(math.log(bytes, 1024.0)) suffix = 'bkMGTPEZY'[exponent] - converted = float(bytes) / float(1024**exponent) + converted = float(bytes) / float(1024 ** exponent) return '%.2f%s' % (converted, suffix) @staticmethod @@ -463,7 +475,7 @@ class FileDownloader(object): os.rename(old_filename, new_filename) except (IOError, OSError), err: self.trouble(u'ERROR: unable to rename file') - + def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" if last_modified_hdr is None: @@ -477,7 +489,7 @@ class FileDownloader(object): if filetime is None: return try: - os.utime(filename,(time.time(), filetime)) + os.utime(filename, (time.time(), filetime)) except: pass @@ -680,7 +692,7 @@ class FileDownloader(object): # Request parameters in case of being able to resume if self.params.get('continuedl', False) and resume_len != 0: self.report_resuming_byte(resume_len) - request.add_header('Range','bytes=%d-' % resume_len) + request.add_header('Range', 'bytes=%d-' % resume_len) open_mode = 'ab' count = 0 @@ -706,7 +718,7 @@ class FileDownloader(object): else: # Examine the reported length if (content_length is not None and - (resume_len - 100 < long(content_length) < resume_len + 100)): + (resume_len - 100 < long(content_length) < resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -784,6 +796,7 @@ class FileDownloader(object): return True + class InfoExtractor(object): """Information Extractor class. @@ -855,6 +868,7 @@ class InfoExtractor(object): """Real extraction process. Redefine in subclasses.""" pass + class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" @@ -1009,7 +1023,7 @@ class YoutubeIE(InfoExtractor): self.report_video_info_webpage_download(video_id) for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) + % (video_id, el_type)) request = urllib2.Request(video_info_url) try: video_info_webpage = urllib2.urlopen(request).read() @@ -1371,6 +1385,7 @@ class DailymotionIE(InfoExtractor): except UnavailableVideoError: self._downloader.trouble(u'\nERROR: unable to download video') + class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1464,7 +1479,6 @@ class GoogleIE(InfoExtractor): else: # we need something to pass to process_info video_thumbnail = '' - try: # Process video information self._downloader.process_info({ @@ -1664,7 +1678,8 @@ class YahooIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract video description') return video_description = mobj.group(1).decode('utf-8') - if not video_description: video_description = 'No description available.' + if not video_description: + video_description = 'No description available.' # Extract video height and width mobj = re.search(r'', webpage) @@ -1685,8 +1700,8 @@ class YahooIE(InfoExtractor): yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + - '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + - '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -1779,11 +1794,11 @@ class GenericIE(InfoExtractor): return video_url = urllib.unquote(mobj.group(1)) - video_id = os.path.basename(video_url) + video_id = os.path.basename(video_url) # here's a fun little line of code for you: video_extension = os.path.splitext(video_id)[1][1:] - video_id = os.path.splitext(video_id)[0] + video_id = os.path.splitext(video_id)[0] # it's tempting to parse this further, but you would # have to take into account all the variations like @@ -1856,7 +1871,7 @@ class YoutubeSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -1870,7 +1885,7 @@ class YoutubeSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_youtube_results: - self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) n = self._max_youtube_results self._download_n_results(query, n) return @@ -1914,6 +1929,7 @@ class YoutubeSearchIE(InfoExtractor): pagenum = pagenum + 1 + class GoogleSearchIE(InfoExtractor): """Information Extractor for Google Video search queries.""" _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' @@ -1947,7 +1963,7 @@ class GoogleSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -1961,7 +1977,7 @@ class GoogleSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_google_results: - self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) + self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) n = self._max_google_results self._download_n_results(query, n) return @@ -2005,6 +2021,7 @@ class GoogleSearchIE(InfoExtractor): pagenum = pagenum + 1 + class YahooSearchIE(InfoExtractor): """Information Extractor for Yahoo! Video search queries.""" _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' @@ -2038,7 +2055,7 @@ class YahooSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -2052,7 +2069,7 @@ class YahooSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_yahoo_results: - self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) + self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) n = self._max_yahoo_results self._download_n_results(query, n) return @@ -2096,6 +2113,7 @@ class YahooSearchIE(InfoExtractor): pagenum = pagenum + 1 + class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" @@ -2172,6 +2190,7 @@ class YoutubePlaylistIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return + class YoutubeUserIE(InfoExtractor): """Information Extractor for YouTube users.""" @@ -2193,7 +2212,7 @@ class YoutubeUserIE(InfoExtractor): def report_download_page(self, username, start_index): """Report attempt to download user page.""" self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % - (username, start_index, start_index + self._GDATA_PAGE_SIZE)) + (username, start_index, start_index + self._GDATA_PAGE_SIZE)) def _real_initialize(self): self._youtube_ie.initialize() @@ -2255,9 +2274,9 @@ class YoutubeUserIE(InfoExtractor): video_ids = video_ids[playliststart:] else: video_ids = video_ids[playliststart:playlistend] - + self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % - (username, all_ids_count, len(video_ids))) + (username, all_ids_count, len(video_ids))) for video_id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) @@ -2342,6 +2361,7 @@ class DepositFilesIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'ERROR: unable to download file') + class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" @@ -2510,7 +2530,7 @@ class FacebookIE(InfoExtractor): # description video_description = 'No description available.' if (self._downloader.params.get('forcedescription', False) and - 'description' in video_info): + 'description' in video_info): video_description = video_info['description'] url_map = video_info['video_urls'] @@ -2565,6 +2585,7 @@ class FacebookIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'\nERROR: unable to download video') + class PostProcessor(object): """Post Processor class. @@ -2611,6 +2632,7 @@ class PostProcessor(object): """ return information # by default, do nothing + class FFmpegExtractAudioPP(PostProcessor): def __init__(self, downloader=None, preferredcodec=None): From 50891fece71b05bce92f694451656988b53a7038 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 17:32:22 +0200 Subject: [PATCH 55/69] Use os.makedirs instead of homebrewn pmkdir --- youtube-dl | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/youtube-dl b/youtube-dl index 7ac27b5a0..0b06c69cc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -455,16 +455,6 @@ class FileDownloader(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params - @staticmethod - def pmkdir(filename): - """Create directory components in filename. Similar to Unix "mkdir -p".""" - components = filename.split(os.sep) - aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] - aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator - for dir in aggregate: - if not os.path.exists(dir): - os.mkdir(dir) - @staticmethod def format_bytes(bytes): if bytes is None: @@ -721,7 +711,7 @@ class FileDownloader(object): return try: - self.pmkdir(filename) + os.makedirs(os.path.dirname(filename)) except (OSError, IOError), err: self.trouble(u'ERROR: unable to create directories: %s' % str(err)) return From eb99a7ee5f7bd36fa9dfcbaf0590ecc2854e3e30 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 17:42:45 +0200 Subject: [PATCH 56/69] Bump version to 2011.09.06 --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 0b06c69cc..cc6462cc0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -14,7 +14,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.08.28-phihag' +__version__ = '2011.09.06-phihag' UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl' From e5e74ffb97106949c64000e3d4266d0bbf08cc7c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 17:56:05 +0200 Subject: [PATCH 57/69] Fix os.makedirs in Windows --- youtube-dl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index cc6462cc0..153d4132f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -711,7 +711,9 @@ class FileDownloader(object): return try: - os.makedirs(os.path.dirname(filename)) + dn = os.path.dirname(filename) + if dn != '' and not os.path.exists(dn): + os.makedirs(dn) except (OSError, IOError), err: self.trouble(u'ERROR: unable to create directories: %s' % str(err)) return From 9b0a8bc1982a2f10f6e79e9e8fe4a787e4d665e2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 23:56:32 +0200 Subject: [PATCH 58/69] myvideo.de support --- youtube-dl | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/youtube-dl b/youtube-dl index 153d4132f..f32716f1e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2954,6 +2954,82 @@ class BlipTVIE(InfoExtractor): self._downloader.trouble(u'\nERROR: unable to download video') +class MyVideoIE(InfoExtractor): + """Information Extractor for myvideo.de.""" + + _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(MyVideoIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._download.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group(1) + simple_title = mobj.group(2).decode('utf-8') + # should actually not be necessary + simple_title = sanitize_title(simple_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title) + + # Get video webpage + request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + self.report_extraction(video_id) + mobj = re.search(r'', + webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + video_url = mobj.group(1) + ('/%s.flv' % video_id) + + mobj = re.search('([^<]+)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + + video_title = mobj.group(1) + video_title = sanitize_title(video_title) + + try: + print(video_url) + self._downloader.process_info({ + 'id': video_id, + 'url': video_url, + 'uploader': u'NA', + 'upload_date': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': u'flv', + 'format': u'NA', + 'player_url': None, + }) + except UnavailableVideoError: + self._downloader.trouble(u'\nERROR: Unable to download video') + class PostProcessor(object): """Post Processor class. @@ -3369,6 +3445,8 @@ def main(): facebook_ie = FacebookIE() bliptv_ie = BlipTVIE() vimeo_ie = VimeoIE() + myvideo_ie = MyVideoIE() + generic_ie = GenericIE() # File downloader @@ -3425,6 +3503,7 @@ def main(): fd.add_info_extractor(facebook_ie) fd.add_info_extractor(bliptv_ie) fd.add_info_extractor(vimeo_ie) + fd.add_info_extractor(myvideo_ie) # This must come last since it's the # fallback if none of the others work From 6fc5b0bb17f814579c8e3b130a4ff0824333e959 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 23:58:00 +0200 Subject: [PATCH 59/69] =?UTF-8?q?Credit=20S=C3=B6ren=20Schulze=20for=20myv?= =?UTF-8?q?ideo=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index f32716f1e..135e05c0f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -11,6 +11,7 @@ __author__ = ( 'Gergely Imreh', 'Rogério Brito', 'Philipp Hagemeister', + 'Sören Schulze', ) __license__ = 'Public Domain' From cec3a53cbdc84bf83062a1016a7c6bd77393a9ea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 09:35:22 +0200 Subject: [PATCH 60/69] Do not try to re-encode unicode filenames (Closes #13) --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 135e05c0f..2a116042e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -716,7 +716,7 @@ class FileDownloader(object): if dn != '' and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError), err: - self.trouble(u'ERROR: unable to create directories: %s' % str(err)) + self.trouble(u'ERROR: unable to create directory ' + unicode(err)) return if self.params.get('writedescription', False): @@ -729,7 +729,7 @@ class FileDownloader(object): finally: descfile.close() except (OSError, IOError): - self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) + self.trouble(u'ERROR: Cannot write description file ' + descfn) return if self.params.get('writeinfojson', False): @@ -747,7 +747,7 @@ class FileDownloader(object): finally: infof.close() except (OSError, IOError): - self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn)) + self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) return try: From c8e30044b8180d88ff49a2d1540fd34a81dacfee Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 21:36:06 +0200 Subject: [PATCH 61/69] Rudimentary support for comedycentral (rtmpdump currently broken) --- youtube-dl | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 2a116042e..23603438d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -63,6 +63,11 @@ try: except ImportError: pass # Handled below +try: + import xml.etree.ElementTree +except ImportError: # Python<2.5 + pass # Not officially supported, but let it slip + std_headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', @@ -817,7 +822,7 @@ class FileDownloader(object): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] + basic_args = ['rtmpdump'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]) while retval == 2 or retval == 1: prevsize = os.path.getsize(tmpfilename) @@ -3031,6 +3036,91 @@ class MyVideoIE(InfoExtractor): except UnavailableVideoError: self._downloader.trouble(u'\nERROR: Unable to download video') +class ComedyCentralIE(InfoExtractor): + """Information extractor for blip.tv""" + + _VALID_URL = r'^(?:https?://)?(www\.)?(thedailyshow|colbertnation)\.com/full-episodes/(.*)$' + + @staticmethod + def suitable(url): + return (re.match(ComedyCentralIE._VALID_URL, url) is not None) + + def report_extraction(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) + + def report_config_download(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + epTitle = mobj.group(3) + + req = urllib2.Request(url) + self.report_extraction(epTitle) + try: + html = urllib2.urlopen(req).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) + return + + mMovieParams = re.findall('', html) + if len(mMovieParams) == 0: + self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) + return + ACT_COUNT = 4 + mediaNum = int(mMovieParams[0][1]) - ACT_COUNT + + for actNum in range(ACT_COUNT): + mediaId = mMovieParams[0][0] + str(mediaNum + actNum) + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + + urllib.urlencode({'uri': mediaId})) + configReq = urllib2.Request(configUrl) + self.report_config_download(epTitle) + try: + configXml = urllib2.urlopen(configReq).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) + return + + cdoc = xml.etree.ElementTree.fromstring(configXml) + turls = [] + for rendition in cdoc.findall('.//rendition'): + finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) + turls.append(finfo) + + # For now, just pick the highest bitrate + format,video_url = turls[-1] + + self._downloader.increment_downloads() + actTitle = epTitle + '-act' + str(actNum+1) + info = { + 'id': epTitle, + 'url': video_url, + 'uploader': 'NA', + 'upload_date': 'NA', + 'title': actTitle, + 'stitle': self._simplify_title(actTitle), + 'ext': 'mp4', + 'format': format, + 'thumbnail': None, + 'description': 'TODO: Not yet supported', + 'player_url': None + } + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download video') + + class PostProcessor(object): """Post Processor class. @@ -3375,7 +3465,8 @@ def main(): # General configuration cookie_processor = urllib2.HTTPCookieProcessor(jar) - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) + opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()) + urllib2.install_opener(opener) socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) # Batch file verification @@ -3447,6 +3538,7 @@ def main(): bliptv_ie = BlipTVIE() vimeo_ie = VimeoIE() myvideo_ie = MyVideoIE() + comedycentral_ie = ComedyCentralIE() generic_ie = GenericIE() @@ -3505,6 +3597,7 @@ def main(): fd.add_info_extractor(bliptv_ie) fd.add_info_extractor(vimeo_ie) fd.add_info_extractor(myvideo_ie) + fd.add_info_extractor(comedycentral_ie) # This must come last since it's the # fallback if none of the others work From 0f862ea18cdfdc4489c0b1915d52bd2296c1ebc3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 21:43:19 +0200 Subject: [PATCH 62/69] comedycentral: include player URL (still broken) --- youtube-dl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 23603438d..71e7aa8d8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3071,15 +3071,17 @@ class ComedyCentralIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) return - mMovieParams = re.findall('', html) + mMovieParams = re.findall('', html) if len(mMovieParams) == 0: self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) return ACT_COUNT = 4 - mediaNum = int(mMovieParams[0][1]) - ACT_COUNT + player_url = mMovieParams[0][0] + mediaNum = int(mMovieParams[0][2]) - ACT_COUNT + movieId = mMovieParams[0][1] for actNum in range(ACT_COUNT): - mediaId = mMovieParams[0][0] + str(mediaNum + actNum) + mediaId = movieId + str(mediaNum + actNum) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + urllib.urlencode({'uri': mediaId})) configReq = urllib2.Request(configUrl) @@ -3112,7 +3114,7 @@ class ComedyCentralIE(InfoExtractor): 'format': format, 'thumbnail': None, 'description': 'TODO: Not yet supported', - 'player_url': None + 'player_url': player_url } try: From fedf9f390210d0a06f323f0476681b607ee57b0f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 22:06:09 +0200 Subject: [PATCH 63/69] Basic comedycentral (The Daily Show) support (Will work as soon as rtmpdump gets fixed) --- youtube-dl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 71e7aa8d8..a1245a8b4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3051,6 +3051,9 @@ class ComedyCentralIE(InfoExtractor): def report_config_download(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + def report_player_url(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) + def _simplify_title(self, title): res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) res = res.strip(ur'_') @@ -3076,10 +3079,19 @@ class ComedyCentralIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) return ACT_COUNT = 4 - player_url = mMovieParams[0][0] + first_player_url = mMovieParams[0][0] mediaNum = int(mMovieParams[0][2]) - ACT_COUNT movieId = mMovieParams[0][1] + playerReq = urllib2.Request(first_player_url) + self.report_player_url(epTitle) + try: + playerResponse = urllib2.urlopen(playerReq) + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download player: %s' % unicode(err)) + return + player_url = playerResponse.geturl() + for actNum in range(ACT_COUNT): mediaId = movieId + str(mediaNum + actNum) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + @@ -3102,7 +3114,7 @@ class ComedyCentralIE(InfoExtractor): format,video_url = turls[-1] self._downloader.increment_downloads() - actTitle = epTitle + '-act' + str(actNum+1) + actTitle = 'act' + str(actNum+1) info = { 'id': epTitle, 'url': video_url, From 46c8c43266ebeb6013c1424cd7ec5a43ee57fef0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 22:42:33 +0200 Subject: [PATCH 64/69] Switch around act and episode title (makes -t nicer) --- youtube-dl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index a1245a8b4..f3472f253 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3103,7 +3103,7 @@ class ComedyCentralIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) return - + cdoc = xml.etree.ElementTree.fromstring(configXml) turls = [] for rendition in cdoc.findall('.//rendition'): @@ -3116,19 +3116,19 @@ class ComedyCentralIE(InfoExtractor): self._downloader.increment_downloads() actTitle = 'act' + str(actNum+1) info = { - 'id': epTitle, + 'id': actTitle, 'url': video_url, 'uploader': 'NA', 'upload_date': 'NA', - 'title': actTitle, - 'stitle': self._simplify_title(actTitle), + 'title': epTitle, + 'stitle': self._simplify_title(epTitle), 'ext': 'mp4', 'format': format, 'thumbnail': None, 'description': 'TODO: Not yet supported', 'player_url': player_url } - + try: self._downloader.process_info(info) except UnavailableVideoError, err: From a88bc6bbd388efba2e7c6534a185cf57bebfe046 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 23:15:26 +0200 Subject: [PATCH 65/69] Temporarily fix dailyshow+colbertnation media IDs --- youtube-dl | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index f3472f253..651e9d3a1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3074,13 +3074,22 @@ class ComedyCentralIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) return - mMovieParams = re.findall('', html) + mMovieParams = re.findall('', html) if len(mMovieParams) == 0: self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) return - ACT_COUNT = 4 + show_id = mMovieParams[0][2] + ACT_COUNT = { # TODO: Detect this dynamically + 'thedailyshow.com': 4, + 'colbertnation.com': 3, + }.get(show_id, 4) + OFFSET = { + 'thedailyshow.com': -ACT_COUNT, + 'colbertnation.com': 1, + }.get(show_id, -ACT_COUNT) + first_player_url = mMovieParams[0][0] - mediaNum = int(mMovieParams[0][2]) - ACT_COUNT + mediaNum = int(mMovieParams[0][3]) + OFFSET movieId = mMovieParams[0][1] playerReq = urllib2.Request(first_player_url) @@ -3093,6 +3102,7 @@ class ComedyCentralIE(InfoExtractor): player_url = playerResponse.geturl() for actNum in range(ACT_COUNT): + actTitle = 'act' + str(actNum+1) mediaId = movieId + str(mediaNum + actNum) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + urllib.urlencode({'uri': mediaId})) @@ -3110,18 +3120,23 @@ class ComedyCentralIE(InfoExtractor): finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) turls.append(finfo) + if len(turls) == 0: + self._downloader.trouble(u'\nERROR: unable to download ' + actTitle + ': No videos found') + continue + # For now, just pick the highest bitrate format,video_url = turls[-1] self._downloader.increment_downloads() - actTitle = 'act' + str(actNum+1) + + effTitle = show_id.replace('.com', '') + '-' + epTitle info = { 'id': actTitle, 'url': video_url, - 'uploader': 'NA', + 'uploader': show_id, 'upload_date': 'NA', - 'title': epTitle, - 'stitle': self._simplify_title(epTitle), + 'title': effTitle, + 'stitle': self._simplify_title(effTitle), 'ext': 'mp4', 'format': format, 'thumbnail': None, @@ -3132,7 +3147,8 @@ class ComedyCentralIE(InfoExtractor): try: self._downloader.process_info(info) except UnavailableVideoError, err: - self._downloader.trouble(u'\nERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download ' + actTitle) + continue class PostProcessor(object): From 5991ddfd7adf7a45f8637b313bad881b646891ea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 8 Sep 2011 18:49:28 +0200 Subject: [PATCH 66/69] comedycentral: Use media number instead of act number as ID --- youtube-dl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 651e9d3a1..017a4610c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3089,7 +3089,7 @@ class ComedyCentralIE(InfoExtractor): }.get(show_id, -ACT_COUNT) first_player_url = mMovieParams[0][0] - mediaNum = int(mMovieParams[0][3]) + OFFSET + startMediaNum = int(mMovieParams[0][3]) + OFFSET movieId = mMovieParams[0][1] playerReq = urllib2.Request(first_player_url) @@ -3102,8 +3102,8 @@ class ComedyCentralIE(InfoExtractor): player_url = playerResponse.geturl() for actNum in range(ACT_COUNT): - actTitle = 'act' + str(actNum+1) - mediaId = movieId + str(mediaNum + actNum) + mediaNum = startMediaNum + actNum + mediaId = movieId + str(mediaNum) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + urllib.urlencode({'uri': mediaId})) configReq = urllib2.Request(configUrl) @@ -3121,7 +3121,7 @@ class ComedyCentralIE(InfoExtractor): turls.append(finfo) if len(turls) == 0: - self._downloader.trouble(u'\nERROR: unable to download ' + actTitle + ': No videos found') + self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum) + ': No videos found') continue # For now, just pick the highest bitrate @@ -3131,7 +3131,7 @@ class ComedyCentralIE(InfoExtractor): effTitle = show_id.replace('.com', '') + '-' + epTitle info = { - 'id': actTitle, + 'id': str(mediaNum), 'url': video_url, 'uploader': show_id, 'upload_date': 'NA', @@ -3147,7 +3147,7 @@ class ComedyCentralIE(InfoExtractor): try: self._downloader.process_info(info) except UnavailableVideoError, err: - self._downloader.trouble(u'\nERROR: unable to download ' + actTitle) + self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum)) continue From d793aebaed09f9ec19e5b7d07e8e3063545e9a72 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 9 Sep 2011 08:14:01 +0200 Subject: [PATCH 67/69] comedycentral: 1 seems to be the constant correct offset --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 017a4610c..1b2ccae2f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.06-phihag' +__version__ = '2011.09.09-phihag' UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl' @@ -3084,9 +3084,9 @@ class ComedyCentralIE(InfoExtractor): 'colbertnation.com': 3, }.get(show_id, 4) OFFSET = { - 'thedailyshow.com': -ACT_COUNT, + 'thedailyshow.com': 1, 'colbertnation.com': 1, - }.get(show_id, -ACT_COUNT) + }.get(show_id, 1) first_player_url = mMovieParams[0][0] startMediaNum = int(mMovieParams[0][3]) + OFFSET From 454d6691d8f38b24bd0bcac9ea77993f9a8f5852 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 9 Sep 2011 08:41:52 +0200 Subject: [PATCH 68/69] Include ERROR: no fmt_url_map or conn information found in video info in FAQ --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 66639ad04..af03fc0a3 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,10 @@ Once the video is fully downloaded, use any video player, such as [vlc](http://w The URLs youtube-dl outputs require the downloader to have the correct cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. +### ERROR: no fmt_url_map or conn information found in video info + +youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl -U`. + ## COPYRIGHT **youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is released into the public domain by the copyright holder. This README file was @@ -87,4 +91,4 @@ originally written by Daniel Bolton () and is likewise released into the public domain. ## BUGS -Bugs should be reported at: +Bugs and suggestions should be reported at: From e133e1213f3242bec6c3139f43c686bc84d71bfa Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 9 Sep 2011 08:47:00 +0200 Subject: [PATCH 69/69] README: More bug filing instructions --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index af03fc0a3..f292e85d7 100644 --- a/README.md +++ b/README.md @@ -91,4 +91,12 @@ originally written by Daniel Bolton () and is likewise released into the public domain. ## BUGS + Bugs and suggestions should be reported at: + +Please include: + +* Your exact command line, like `youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem. +* The output of `youtube-dl --version` +* The output of `python --version` +* The name and version of your Operating System ("Ubuntu 11.04 x64" or "Windows 7 x64" is usually enough).