Merge e516d7f2cd into 77bd7968ea

2025-08-22 22:34:01 -07:00 · 2012-12-19 07:44:55 -08:00 · 2012-12-19 07:44:55 -08:00 · a2d475958d
commit a2d475958d
parent 77bd7968ea e516d7f2cd
4 changed files with 45 additions and 39 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 *.pyc
 *.pyo
 *~
+*.DS_Store
 wine-py2exe/
 py2exe.log
 *.kate-swp
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@ -454,7 +454,7 @@ class FileDownloader(object):
                self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
                return
            try:
-                with io.open(encodeFilename(infofn), 'w', 'utf-8') as infof:
+                with io.open(encodeFilename(infofn), 'wb') as infof:
                    json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
                    json.dump(json_info_dict, infof)
            except (OSError, IOError):
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@ -999,7 +999,7 @@ class VimeoIE(InfoExtractor):
        video_thumbnail = config["video"]["thumbnail"]

        # Extract video description
-        video_description = get_element_by_id("description", webpage)
+        video_description = get_element_by_attribute("itemprop", "description", webpage)
        if video_description: video_description = clean_html(video_description)
        else: video_description = ''

--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -201,10 +201,11 @@ def htmlentity_transform(matchobj):
    return (u'&%s;' % entity)

 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
-class IDParser(compat_html_parser.HTMLParser):
-    """Modified HTMLParser that isolates a tag with the specified id"""
-    def __init__(self, id):
-        self.id = id
+class AttrParser(compat_html_parser.HTMLParser):
+    """Modified HTMLParser that isolates a tag with the specified attribute"""
+    def __init__(self, attribute, value):
+        self.attribute = attribute
+        self.value = value
        self.result = None
        self.started = False
        self.depth = {}
@ -229,7 +230,7 @@ class IDParser(compat_html_parser.HTMLParser):
        attrs = dict(attrs)
        if self.started:
            self.find_startpos(None)
-        if 'id' in attrs and attrs['id'] == self.id:
+        if self.attribute in attrs and attrs[self.attribute] == self.value:
            self.result = [tag]
            self.started = True
            self.watch_startpos = True
@ -267,8 +268,12 @@ class IDParser(compat_html_parser.HTMLParser):
        return '\n'.join(lines).strip()

 def get_element_by_id(id, html):
-    """Return the content of the tag with the specified id in the passed HTML document"""
-    parser = IDParser(id)
+    """Return the content of the tag with the specified ID in the passed HTML document"""
+    return get_element_by_attribute("id", id, html)
+
+def get_element_by_attribute(attribute, value, html):
+    """Return the content of the tag with the specified attribute in the passed HTML document"""
+    parser = AttrParser(attribute, value)
    try:
        parser.loads(html)
    except compat_html_parser.HTMLParseError: