mirror of
https://github.com/byt3bl33d3r/MITMf.git
synced 2025-07-06 04:52:22 -07:00
Inject.py now tries to detect encoding before parsing HTML with BeautifulSoup
This commit is contained in:
parent
fb41a510f6
commit
27c28e512e
3 changed files with 29 additions and 18 deletions
|
@ -57,27 +57,37 @@ class Inject(Plugin):
|
|||
|
||||
def response(self, response, request, data):
|
||||
|
||||
encoding = None
|
||||
ip = response.getClientIP()
|
||||
hn = response.getRequestHostname()
|
||||
mime = response.headers['Content-Type']
|
||||
|
||||
try:
|
||||
mime = response.headers['Content-Type']
|
||||
except KeyError:
|
||||
return
|
||||
|
||||
if "charset" in mime:
|
||||
match = re.search('charset=(.*)', mime)
|
||||
if match:
|
||||
encoding = match.group(1).strip().replace('"', "")
|
||||
else:
|
||||
try:
|
||||
encoding = chardet.detect(data)["encoding"]
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
encoding = chardet.detect(data)["encoding"]
|
||||
except:
|
||||
pass
|
||||
|
||||
if self._should_inject(ip, hn) and self._ip_filter(ip) and self._host_filter(hn) and (hn not in self.ip) and ("text/html" in mime):
|
||||
|
||||
if "charset" in mime:
|
||||
match = re.search('charset=(.*)', mime)
|
||||
if match:
|
||||
encoding = match.group(1).strip().replace('"', "")
|
||||
else:
|
||||
try:
|
||||
encoding = chardet.detect(data)["encoding"]
|
||||
except:
|
||||
encoding = None
|
||||
if encoding is not None:
|
||||
html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml")
|
||||
else:
|
||||
html = BeautifulSoup(data, "lxml")
|
||||
|
||||
if encoding:
|
||||
html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml")
|
||||
else:
|
||||
html = BeautifulSoup(data, "lxml") # let bs find the encoding
|
||||
|
||||
if html.body:
|
||||
|
||||
if self.html_url:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue