diff --git a/plugins/inject.py b/plugins/inject.py index 327672c..a005215 100644 --- a/plugins/inject.py +++ b/plugins/inject.py @@ -18,6 +18,8 @@ import time import sys +import re +import chardet from bs4 import BeautifulSoup from plugins.plugin import Plugin @@ -52,6 +54,7 @@ class Inject(Plugin): self.dtable = {} self.count = 0 + def response(self, response, request, data): ip = response.getClientIP() @@ -59,7 +62,22 @@ class Inject(Plugin): mime = response.headers['Content-Type'] if self._should_inject(ip, hn) and self._ip_filter(ip) and self._host_filter(hn) and (hn not in self.ip) and ("text/html" in mime): - html = BeautifulSoup(data, "lxml") + + if "charset" in mime: + match = re.search('charset=(.*)', mime) + if match: + encoding = match.group(1).strip().replace('"', "") + else: + try: + encoding = chardet.detect(data)["encoding"] + except: + encoding = None + + if encoding: + html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml") + else: + html = BeautifulSoup(data, "lxml") # let bs find the encoding + if html.body: if self.html_url: diff --git a/requirements.txt b/requirements.txt index 39e3952..9cc54f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,5 @@ capstone python-magic msgpack-python requests -pypcap \ No newline at end of file +pypcap +chardet