This commit is contained in:
Sepero 2012-11-25 18:54:22 -08:00
commit b97c8d18a1
4 changed files with 91 additions and 106 deletions

View file

@ -19,7 +19,8 @@ which means you can modify it, redistribute it or use it however you like.
-U, --update update this program to latest version
-i, --ignore-errors continue on download errors
-r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m)
-R, --retries RETRIES number of retries (default is 10)
-R, --retries RETRIES number of retries (default is 10). Specify 0 or inf
for infinite retries
--dump-user-agent display the current browser identification
--user-agent UA specify a custom user agent
--list-extractors List all supported extractors and the URLs they

View file

@ -292,7 +292,7 @@ class FileDownloader(object):
def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %s of %s)...' % (count, retries))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
@ -584,8 +584,8 @@ class FileDownloader(object):
resume_len = 0
count = 0
retries = self.params.get('retries', 0)
while count <= retries:
retries = self.params['retries']
while retries == 0 or count < retries:
# Establish connection
try:
if count == 0 and 'urlhandle' in info_dict:
@ -626,10 +626,9 @@ class FileDownloader(object):
break
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
self.report_retry(count, retries if retries else "infinite")
if count > retries:
if retries != 0 and count > retries:
self.trouble(u'ERROR: giving up after %s retries' % retries)
return False

View file

@ -93,6 +93,66 @@ class InfoExtractor(object):
"""Real extraction process. Redefine in subclasses."""
pass
def _login(self):
if self._downloader is None:
return False
username = None
password = None
downloader_params = self._downloader.params
# Attempt to use provided username and password or .netrc data
if downloader_params.get('username', None) and \
downloader_params.get('password', None):
username = downloader_params['username']
password = downloader_params['password']
elif downloader_params.get('usenetrc', False):
try:
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
if info is not None:
username = info[0]
password = info[2]
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
return False
# Set language
if hasattr(self, "_LANG_URL"):
request = urllib2.Request(self._LANG_URL)
try:
self.report_lang()
urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
return False
# No authentication to be performed
if username is None:
return False
login_form = self._LOGIN_FORM
# Set login credentials
for k in login_form:
if login_form[k] == "username":
login_form[k] = username
elif login_form[k] == "password":
login_form[k] = password
request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
try:
self.report_login()
login_results = urllib2.urlopen(request).read()
if re.search(self._FAILED_LOGIN, login_results) is not None:
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
return False
return request
class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
@ -119,9 +179,17 @@ class YoutubeIE(InfoExtractor):
$"""
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_FAILED_LOGIN = r'(?i)<form[^>]* name="loginForm"'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_NETRC_MACHINE = 'youtube'
_LOGIN_FORM = {
'current_form': 'loginForm',
'next': '/',
'action_login': 'Log In',
'username': "username",
'password': "password",
}
# Listed in order of quality
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
@ -218,59 +286,10 @@ class YoutubeIE(InfoExtractor):
print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
def _real_initialize(self):
if self._downloader is None:
return
username = None
password = None
downloader_params = self._downloader.params
# Attempt to use provided username and password or .netrc data
if downloader_params.get('username', None) is not None:
username = downloader_params['username']
password = downloader_params['password']
elif downloader_params.get('usenetrc', False):
try:
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
if info is not None:
username = info[0]
password = info[2]
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
return
# Set language
request = urllib2.Request(self._LANG_URL)
try:
self.report_lang()
urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
return
# No authentication to be performed
if username is None:
return
# Log in
login_form = {
'current_form': 'loginForm',
'next': '/',
'action_login': 'Log In',
'username': username,
'password': password,
}
request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
try:
self.report_login()
login_results = urllib2.urlopen(request).read()
if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
request = self._login()
if not request:
return
# Confirm age
@ -1898,7 +1917,13 @@ class FacebookIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
_FAILED_LOGIN = r'<form(.*)name="login"(.*)</form>'
_NETRC_MACHINE = 'facebook'
_LOGIN_FORM = {
'email': "username",
'pass': "password",
'login': 'Log+In'
}
_available_formats = ['video', 'highqual', 'lowqual']
_video_extensions = {
'video': 'mp4',
@ -1953,48 +1978,8 @@ class FacebookIE(InfoExtractor):
return video_info
def _real_initialize(self):
if self._downloader is None:
return
useremail = None
password = None
downloader_params = self._downloader.params
# Attempt to use provided username and password or .netrc data
if downloader_params.get('username', None) is not None:
useremail = downloader_params['username']
password = downloader_params['password']
elif downloader_params.get('usenetrc', False):
try:
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
if info is not None:
useremail = info[0]
password = info[2]
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
return
if useremail is None:
return
# Log in
login_form = {
'email': useremail,
'pass': password,
'login': 'Log+In'
}
request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
try:
self.report_login()
login_results = urllib2.urlopen(request).read()
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
return
self._login()
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -188,7 +188,7 @@ def parseOpts():
general.add_option('-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
general.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
dest='retries', metavar='RETRIES', help='number of retries (default is %default). specify 0 or inf for infinite retries', default=10)
general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False)
@ -437,11 +437,11 @@ def _real_main():
if numeric_limit is None:
parser.error(u'invalid rate limit specified')
opts.ratelimit = numeric_limit
if opts.retries is not None:
try:
opts.retries = long(opts.retries)
except (TypeError, ValueError), err:
parser.error(u'invalid retry count specified')
try:
if opts.retries = "inf": opts.retries = 0
opts.retries = long(opts.retries)
except (TypeError, ValueError), err:
parser.error(u'invalid retry count specified')
try:
opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0: