mirror of
https://github.com/qbittorrent/qBittorrent
synced 2025-08-19 04:49:47 -07:00
Merge pull request #22070 from Chocobo1/py_html_decode
Improve Search engine
This commit is contained in:
commit
e740a42366
1 changed files with 9 additions and 21 deletions
|
@ -1,4 +1,4 @@
|
||||||
#VERSION: 1.49
|
#VERSION: 1.50
|
||||||
|
|
||||||
# Author:
|
# Author:
|
||||||
# Christophe DUMEZ (chris@qbittorrent.org)
|
# Christophe DUMEZ (chris@qbittorrent.org)
|
||||||
|
@ -29,12 +29,13 @@
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import gzip
|
import gzip
|
||||||
import html.entities
|
import html
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import socks
|
import socks
|
||||||
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import urllib.error
|
import urllib.error
|
||||||
|
@ -72,29 +73,16 @@ if "sock_proxy" in os.environ and len(os.environ["sock_proxy"].strip()) > 0:
|
||||||
socket.socket = socks.socksocket # type: ignore[misc]
|
socket.socket = socks.socksocket # type: ignore[misc]
|
||||||
|
|
||||||
|
|
||||||
def htmlentitydecode(s: str) -> str:
|
# This is only provided for backward compatibility, new code should not use it
|
||||||
# First convert alpha entities (such as é)
|
htmlentitydecode = html.unescape
|
||||||
# (Inspired from http://mail.python.org/pipermail/python-list/2007-June/443813.html)
|
|
||||||
def entity2char(m: re.Match[str]) -> str:
|
|
||||||
entity = m.group(1)
|
|
||||||
if entity in html.entities.name2codepoint:
|
|
||||||
return chr(html.entities.name2codepoint[entity])
|
|
||||||
return " " # Unknown entity: We replace with a space.
|
|
||||||
t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s)
|
|
||||||
|
|
||||||
# Then convert numerical entities (such as é)
|
|
||||||
t = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), t)
|
|
||||||
|
|
||||||
# Then convert hexa entities (such as é)
|
|
||||||
return re.sub(r'&#x(\w+);', lambda x: chr(int(x.group(1), 16)), t)
|
|
||||||
|
|
||||||
|
|
||||||
def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None) -> str:
|
def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None, ssl_context: Optional[ssl.SSLContext] = None) -> str:
|
||||||
""" Return the content of the url page as a string """
|
""" Return the content of the url page as a string """
|
||||||
|
|
||||||
request = urllib.request.Request(url, request_data, {**headers, **custom_headers})
|
request = urllib.request.Request(url, request_data, {**headers, **custom_headers})
|
||||||
try:
|
try:
|
||||||
response = urllib.request.urlopen(request)
|
response = urllib.request.urlopen(request, context=ssl_context)
|
||||||
except urllib.error.URLError as errno:
|
except urllib.error.URLError as errno:
|
||||||
print(f"Connection error: {errno.reason}", file=sys.stderr)
|
print(f"Connection error: {errno.reason}", file=sys.stderr)
|
||||||
return ""
|
return ""
|
||||||
|
@ -117,14 +105,14 @@ def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data:
|
||||||
return dataStr
|
return dataStr
|
||||||
|
|
||||||
|
|
||||||
def download_file(url: str, referer: Optional[str] = None) -> str:
|
def download_file(url: str, referer: Optional[str] = None, ssl_context: Optional[ssl.SSLContext] = None) -> str:
|
||||||
""" Download file at url and write it to a file, return the path to the file and the url """
|
""" Download file at url and write it to a file, return the path to the file and the url """
|
||||||
|
|
||||||
# Download url
|
# Download url
|
||||||
request = urllib.request.Request(url, headers=headers)
|
request = urllib.request.Request(url, headers=headers)
|
||||||
if referer is not None:
|
if referer is not None:
|
||||||
request.add_header('referer', referer)
|
request.add_header('referer', referer)
|
||||||
response = urllib.request.urlopen(request)
|
response = urllib.request.urlopen(request, context=ssl_context)
|
||||||
data = response.read()
|
data = response.read()
|
||||||
|
|
||||||
# Check if it is gzipped
|
# Check if it is gzipped
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue