From 4f3d77963fa51a1b65e02cb019d16f20dd8a6a04 Mon Sep 17 00:00:00 2001 From: Chocobo1 Date: Mon, 6 Jan 2025 19:05:57 +0800 Subject: [PATCH] Add parameter to control whether to unescape HTML entities Some plugin needed the raw data for further processing. Related: #22074. PR #22106. --- src/searchengine/nova3/helpers.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/searchengine/nova3/helpers.py b/src/searchengine/nova3/helpers.py index abf201439..47db27bcc 100644 --- a/src/searchengine/nova3/helpers.py +++ b/src/searchengine/nova3/helpers.py @@ -1,4 +1,4 @@ -#VERSION: 1.50 +#VERSION: 1.51 # Author: # Christophe DUMEZ (chris@qbittorrent.org) @@ -77,7 +77,7 @@ if "sock_proxy" in os.environ and len(os.environ["sock_proxy"].strip()) > 0: htmlentitydecode = html.unescape -def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None, ssl_context: Optional[ssl.SSLContext] = None) -> str: +def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None, ssl_context: Optional[ssl.SSLContext] = None, unescape_html_entities: bool = True) -> str: """ Return the content of the url page as a string """ request = urllib.request.Request(url, request_data, {**headers, **custom_headers}) @@ -101,7 +101,10 @@ def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: pass dataStr = data.decode(charset, 'replace') - dataStr = htmlentitydecode(dataStr) + + if unescape_html_entities: + dataStr = html.unescape(dataStr) + return dataStr