Inject.py now tries to detect encoding before parsing HTML with BeautifulSoup

This commit is contained in:
byt3bl33d3r 2015-08-23 19:42:52 +02:00
parent fb41a510f6
commit 27c28e512e
3 changed files with 29 additions and 18 deletions

View file

@ -11,6 +11,7 @@
- @mmetince - @mmetince
- @niallmerrigan - @niallmerrigan
- @auraltension - @auraltension
- @HAMIDx9
#Unintentional contributors and/or projects that I stole code from #Unintentional contributors and/or projects that I stole code from

View file

@ -57,27 +57,37 @@ class Inject(Plugin):
def response(self, response, request, data): def response(self, response, request, data):
encoding = None
ip = response.getClientIP() ip = response.getClientIP()
hn = response.getRequestHostname() hn = response.getRequestHostname()
mime = response.headers['Content-Type']
try:
mime = response.headers['Content-Type']
except KeyError:
return
if "charset" in mime:
match = re.search('charset=(.*)', mime)
if match:
encoding = match.group(1).strip().replace('"', "")
else:
try:
encoding = chardet.detect(data)["encoding"]
except:
pass
else:
try:
encoding = chardet.detect(data)["encoding"]
except:
pass
if self._should_inject(ip, hn) and self._ip_filter(ip) and self._host_filter(hn) and (hn not in self.ip) and ("text/html" in mime): if self._should_inject(ip, hn) and self._ip_filter(ip) and self._host_filter(hn) and (hn not in self.ip) and ("text/html" in mime):
if "charset" in mime: if encoding is not None:
match = re.search('charset=(.*)', mime) html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml")
if match: else:
encoding = match.group(1).strip().replace('"', "") html = BeautifulSoup(data, "lxml")
else:
try:
encoding = chardet.detect(data)["encoding"]
except:
encoding = None
if encoding:
html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml")
else:
html = BeautifulSoup(data, "lxml") # let bs find the encoding
if html.body: if html.body:
if self.html_url: if self.html_url:

View file

@ -1,7 +1,7 @@
git+git://github.com/kti/python-netfilterqueue git+git://github.com/kti/python-netfilterqueue
pyinotify pyinotify
pycrypto>=2.6 pycrypto
pyasn1>=0.1.7 pyasn1
cryptography cryptography
Pillow Pillow
netaddr netaddr
@ -23,4 +23,4 @@ python-magic
msgpack-python msgpack-python
requests requests
pypcap pypcap
chardet chardet