Merge 20b2a55968 into 99d46e8c27

2025-08-22 06:14:05 -07:00 · 2012-02-25 16:01:11 -08:00 · 2012-02-25 16:01:11 -08:00 · 60ebf369a2
commit 60ebf369a2
parent 99d46e8c27 20b2a55968
1 changed files with 36 additions and 34 deletions
--- a/40
+++ b/40
@ -1593,7 +1593,7 @@ class MetacafeIE(InfoExtractor):
 class DailymotionIE(InfoExtractor):
 	"""Information Extractor for Dailymotion"""

-	_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
+	_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/(.+)'
 	IE_NAME = u'dailymotion'

 	def __init__(self, downloader=None):
@ -1608,7 +1608,7 @@ class DailymotionIE(InfoExtractor):
 		self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)

 	def _real_extract(self, url):
-		# Extract id and simplified title from URL
+		# Extract id
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
@ -2170,27 +2170,15 @@ class GenericIE(InfoExtractor):

 		self.report_extraction(video_id)
 		# Start with something easy: JW Player in SWFObject
-		mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
-		if mobj is None:
+		matches = [mobj for mobj in re.finditer(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)]
+
+		if len(matches) == 0:
 			# Broaden the search a little bit
-			mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
-		if mobj is None:
+			matches = [mobj for mobj in re.finditer(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)]
+		if len(matches) == 0:
 			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
 			return

-		# It's possible that one of the regexes
-		# matched, but returned an empty group:
-		if mobj.group(1) is None:
-			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
-			return
-
-		video_url = urllib.unquote(mobj.group(1))
-		video_id = os.path.basename(video_url)
-
-		# here's a fun little line of code for you:
-		video_extension = os.path.splitext(video_id)[1][1:]
-		video_id = os.path.splitext(video_id)[0]
-
 		# it's tempting to parse this further, but you would
 		# have to take into account all the variations like
 		#   Video Title - Site Name
@ -2212,6 +2200,20 @@ class GenericIE(InfoExtractor):
 			return
 		video_uploader = mobj.group(1).decode('utf-8')

+		for mobj in matches:
+			# It's possible that one of the regexes
+			# matched, but returned an empty group:
+			if mobj.group(1) is None:
+				self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+				continue
+
+			video_url = urllib.unquote(mobj.group(1))
+			video_id = os.path.basename(video_url)
+
+			# here's a fun little line of code for you:
+			video_extension = os.path.splitext(video_id)[1][1:]
+			video_id = os.path.splitext(video_id)[0]
+
 			try:
 				# Process video information
 				self._downloader.process_info({