mirror of
https://github.com/byt3bl33d3r/MITMf.git
synced 2025-07-07 13:32:18 -07:00
Inject.py now tries to detect encoding before parsing HTML with BeautifulSoup
This commit is contained in:
parent
fb41a510f6
commit
27c28e512e
3 changed files with 29 additions and 18 deletions
|
@ -11,6 +11,7 @@
|
||||||
- @mmetince
|
- @mmetince
|
||||||
- @niallmerrigan
|
- @niallmerrigan
|
||||||
- @auraltension
|
- @auraltension
|
||||||
|
- @HAMIDx9
|
||||||
|
|
||||||
#Unintentional contributors and/or projects that I stole code from
|
#Unintentional contributors and/or projects that I stole code from
|
||||||
|
|
||||||
|
|
|
@ -57,27 +57,37 @@ class Inject(Plugin):
|
||||||
|
|
||||||
def response(self, response, request, data):
|
def response(self, response, request, data):
|
||||||
|
|
||||||
|
encoding = None
|
||||||
ip = response.getClientIP()
|
ip = response.getClientIP()
|
||||||
hn = response.getRequestHostname()
|
hn = response.getRequestHostname()
|
||||||
mime = response.headers['Content-Type']
|
|
||||||
|
try:
|
||||||
|
mime = response.headers['Content-Type']
|
||||||
|
except KeyError:
|
||||||
|
return
|
||||||
|
|
||||||
|
if "charset" in mime:
|
||||||
|
match = re.search('charset=(.*)', mime)
|
||||||
|
if match:
|
||||||
|
encoding = match.group(1).strip().replace('"', "")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
encoding = chardet.detect(data)["encoding"]
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
encoding = chardet.detect(data)["encoding"]
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
if self._should_inject(ip, hn) and self._ip_filter(ip) and self._host_filter(hn) and (hn not in self.ip) and ("text/html" in mime):
|
if self._should_inject(ip, hn) and self._ip_filter(ip) and self._host_filter(hn) and (hn not in self.ip) and ("text/html" in mime):
|
||||||
|
|
||||||
if "charset" in mime:
|
if encoding is not None:
|
||||||
match = re.search('charset=(.*)', mime)
|
html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml")
|
||||||
if match:
|
else:
|
||||||
encoding = match.group(1).strip().replace('"', "")
|
html = BeautifulSoup(data, "lxml")
|
||||||
else:
|
|
||||||
try:
|
|
||||||
encoding = chardet.detect(data)["encoding"]
|
|
||||||
except:
|
|
||||||
encoding = None
|
|
||||||
|
|
||||||
if encoding:
|
|
||||||
html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml")
|
|
||||||
else:
|
|
||||||
html = BeautifulSoup(data, "lxml") # let bs find the encoding
|
|
||||||
|
|
||||||
if html.body:
|
if html.body:
|
||||||
|
|
||||||
if self.html_url:
|
if self.html_url:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
git+git://github.com/kti/python-netfilterqueue
|
git+git://github.com/kti/python-netfilterqueue
|
||||||
pyinotify
|
pyinotify
|
||||||
pycrypto>=2.6
|
pycrypto
|
||||||
pyasn1>=0.1.7
|
pyasn1
|
||||||
cryptography
|
cryptography
|
||||||
Pillow
|
Pillow
|
||||||
netaddr
|
netaddr
|
||||||
|
@ -23,4 +23,4 @@ python-magic
|
||||||
msgpack-python
|
msgpack-python
|
||||||
requests
|
requests
|
||||||
pypcap
|
pypcap
|
||||||
chardet
|
chardet
|
Loading…
Add table
Add a link
Reference in a new issue