- Should completly fix unicode problems for all search engines

This commit is contained in:
Christophe Dumez 2009-03-26 20:14:05 +00:00
parent a2e9210665
commit 9e46c6c047
7 changed files with 18 additions and 19 deletions

View file

@ -54,6 +54,6 @@ def retrieve_url(url):
ignore, charset = info['Content-Type'].split('charset=')
except:
pass
dat = dat.decode(charset)
dat = dat.decode(charset, 'replace')
dat = htmlentitydecode(dat)
return dat.encode('utf-8', 'replace')