From 4f3d77963fa51a1b65e02cb019d16f20dd8a6a04 Mon Sep 17 00:00:00 2001
From: Chocobo1 <Chocobo1@users.noreply.github.com>
Date: Mon, 6 Jan 2025 19:05:57 +0800
Subject: [PATCH] Add parameter to control whether to unescape HTML entities

Some plugin needed the raw data for further processing.
Related: #22074.

PR #22106.
---
 src/searchengine/nova3/helpers.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/searchengine/nova3/helpers.py b/src/searchengine/nova3/helpers.py
index abf201439..47db27bcc 100644
--- a/src/searchengine/nova3/helpers.py
+++ b/src/searchengine/nova3/helpers.py
@@ -1,4 +1,4 @@
-#VERSION: 1.50
+#VERSION: 1.51
 
 # Author:
 #  Christophe DUMEZ (chris@qbittorrent.org)
@@ -77,7 +77,7 @@ if "sock_proxy" in os.environ and len(os.environ["sock_proxy"].strip()) > 0:
 htmlentitydecode = html.unescape
 
 
-def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None, ssl_context: Optional[ssl.SSLContext] = None) -> str:
+def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None, ssl_context: Optional[ssl.SSLContext] = None, unescape_html_entities: bool = True) -> str:
     """ Return the content of the url page as a string """
 
     request = urllib.request.Request(url, request_data, {**headers, **custom_headers})
@@ -101,7 +101,10 @@ def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data:
         pass
 
     dataStr = data.decode(charset, 'replace')
-    dataStr = htmlentitydecode(dataStr)
+
+    if unescape_html_entities:
+        dataStr = html.unescape(dataStr)
+
     return dataStr