This commit is contained in:
GitHub Merge Button 2012-02-25 16:01:11 -08:00
commit 60ebf369a2

View file

@ -1593,7 +1593,7 @@ class MetacafeIE(InfoExtractor):
class DailymotionIE(InfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/(.+)'
IE_NAME = u'dailymotion'
def __init__(self, downloader=None):
@ -1608,7 +1608,7 @@ class DailymotionIE(InfoExtractor):
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
def _real_extract(self, url):
# Extract id and simplified title from URL
# Extract id
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
@ -2170,27 +2170,15 @@ class GenericIE(InfoExtractor):
self.report_extraction(video_id)
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
matches = [mobj for mobj in re.finditer(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)]
if len(matches) == 0:
# Broaden the search a little bit
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
if mobj is None:
matches = [mobj for mobj in re.finditer(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)]
if len(matches) == 0:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
# It's possible that one of the regexes
# matched, but returned an empty group:
if mobj.group(1) is None:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
video_url = urllib.unquote(mobj.group(1))
video_id = os.path.basename(video_url)
# here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:]
video_id = os.path.splitext(video_id)[0]
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
@ -2212,6 +2200,20 @@ class GenericIE(InfoExtractor):
return
video_uploader = mobj.group(1).decode('utf-8')
for mobj in matches:
# It's possible that one of the regexes
# matched, but returned an empty group:
if mobj.group(1) is None:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
continue
video_url = urllib.unquote(mobj.group(1))
video_id = os.path.basename(video_url)
# here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:]
video_id = os.path.splitext(video_id)[0]
try:
# Process video information
self._downloader.process_info({