This commit is contained in:
nickdaniels 2012-12-19 07:44:55 -08:00
commit a2d475958d
4 changed files with 45 additions and 39 deletions

1
.gitignore vendored
View file

@ -1,6 +1,7 @@
*.pyc *.pyc
*.pyo *.pyo
*~ *~
*.DS_Store
wine-py2exe/ wine-py2exe/
py2exe.log py2exe.log
*.kate-swp *.kate-swp

View file

@ -454,7 +454,7 @@ class FileDownloader(object):
self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
return return
try: try:
with io.open(encodeFilename(infofn), 'w', 'utf-8') as infof: with io.open(encodeFilename(infofn), 'wb') as infof:
json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
json.dump(json_info_dict, infof) json.dump(json_info_dict, infof)
except (OSError, IOError): except (OSError, IOError):

View file

@ -999,7 +999,7 @@ class VimeoIE(InfoExtractor):
video_thumbnail = config["video"]["thumbnail"] video_thumbnail = config["video"]["thumbnail"]
# Extract video description # Extract video description
video_description = get_element_by_id("description", webpage) video_description = get_element_by_attribute("itemprop", "description", webpage)
if video_description: video_description = clean_html(video_description) if video_description: video_description = clean_html(video_description)
else: video_description = '' else: video_description = ''

View file

@ -201,10 +201,11 @@ def htmlentity_transform(matchobj):
return (u'&%s;' % entity) return (u'&%s;' % entity)
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
class IDParser(compat_html_parser.HTMLParser): class AttrParser(compat_html_parser.HTMLParser):
"""Modified HTMLParser that isolates a tag with the specified id""" """Modified HTMLParser that isolates a tag with the specified attribute"""
def __init__(self, id): def __init__(self, attribute, value):
self.id = id self.attribute = attribute
self.value = value
self.result = None self.result = None
self.started = False self.started = False
self.depth = {} self.depth = {}
@ -229,7 +230,7 @@ class IDParser(compat_html_parser.HTMLParser):
attrs = dict(attrs) attrs = dict(attrs)
if self.started: if self.started:
self.find_startpos(None) self.find_startpos(None)
if 'id' in attrs and attrs['id'] == self.id: if self.attribute in attrs and attrs[self.attribute] == self.value:
self.result = [tag] self.result = [tag]
self.started = True self.started = True
self.watch_startpos = True self.watch_startpos = True
@ -267,8 +268,12 @@ class IDParser(compat_html_parser.HTMLParser):
return '\n'.join(lines).strip() return '\n'.join(lines).strip()
def get_element_by_id(id, html): def get_element_by_id(id, html):
"""Return the content of the tag with the specified id in the passed HTML document""" """Return the content of the tag with the specified ID in the passed HTML document"""
parser = IDParser(id) return get_element_by_attribute("id", id, html)
def get_element_by_attribute(attribute, value, html):
"""Return the content of the tag with the specified attribute in the passed HTML document"""
parser = AttrParser(attribute, value)
try: try:
parser.loads(html) parser.loads(html)
except compat_html_parser.HTMLParseError: except compat_html_parser.HTMLParseError: