Clean up search engine

Notable changes:
1. Prevent excessive engine module imports.
2. Replace trivial usage of `join()`.
3. Keep the output text sorted whenever possible.
4. Close handles properly.
5. Print error to stderr, not stdout.
6. Report search job exit code.
7. Print exception message to stderr if exception was thrown when
   running a search job.
8. Utilize XML library to build XML data
   And use 2 spaces as indentation.

PR #21098.
This commit is contained in:
Chocobo1 2024-07-22 16:51:57 +08:00 committed by GitHub
parent 3c5baac150
commit 69a829dfb0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 168 additions and 198 deletions

View file

@ -1,4 +1,4 @@
#VERSION: 1.47
#VERSION: 1.48
# Author:
# Christophe DUMEZ (chris@qbittorrent.org)
@ -35,12 +35,12 @@ import os
import re
import socket
import socks
import sys
import tempfile
import urllib.error
import urllib.parse
import urllib.request
from collections.abc import Mapping
from typing import Any, Dict, Optional
from typing import Any, Optional
def getBrowserUserAgent() -> str:
@ -59,7 +59,7 @@ def getBrowserUserAgent() -> str:
return f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{nowVersion}.0) Gecko/20100101 Firefox/{nowVersion}.0"
headers: Dict[str, Any] = {'User-Agent': getBrowserUserAgent()}
headers: dict[str, Any] = {'User-Agent': getBrowserUserAgent()}
# SOCKS5 Proxy support
if "sock_proxy" in os.environ and len(os.environ["sock_proxy"].strip()) > 0:
@ -91,51 +91,52 @@ def htmlentitydecode(s: str) -> str:
def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}) -> str:
""" Return the content of the url page as a string """
req = urllib.request.Request(url, headers={**headers, **custom_headers})
request = urllib.request.Request(url, headers={**headers, **custom_headers})
try:
response = urllib.request.urlopen(req)
response = urllib.request.urlopen(request)
except urllib.error.URLError as errno:
print(" ".join(("Connection error:", str(errno.reason))))
print(f"Connection error: {errno.reason}", file=sys.stderr)
return ""
dat: bytes = response.read()
data: bytes = response.read()
# Check if it is gzipped
if dat[:2] == b'\x1f\x8b':
if data[:2] == b'\x1f\x8b':
# Data is gzip encoded, decode it
compressedstream = io.BytesIO(dat)
gzipper = gzip.GzipFile(fileobj=compressedstream)
extracted_data = gzipper.read()
dat = extracted_data
info = response.info()
with io.BytesIO(data) as compressedStream, gzip.GzipFile(fileobj=compressedStream) as gzipper:
data = gzipper.read()
charset = 'utf-8'
try:
ignore, charset = info['Content-Type'].split('charset=')
except Exception:
charset = response.getheader('Content-Type', '').split('charset=', 1)[1]
except IndexError:
pass
datStr = dat.decode(charset, 'replace')
datStr = htmlentitydecode(datStr)
return datStr
dataStr = data.decode(charset, 'replace')
dataStr = htmlentitydecode(dataStr)
return dataStr
def download_file(url: str, referer: Optional[str] = None) -> str:
""" Download file at url and write it to a file, return the path to the file and the url """
fileHandle, path = tempfile.mkstemp()
file = os.fdopen(fileHandle, "wb")
# Download url
req = urllib.request.Request(url, headers=headers)
request = urllib.request.Request(url, headers=headers)
if referer is not None:
req.add_header('referer', referer)
response = urllib.request.urlopen(req)
dat = response.read()
request.add_header('referer', referer)
response = urllib.request.urlopen(request)
data = response.read()
# Check if it is gzipped
if dat[:2] == b'\x1f\x8b':
if data[:2] == b'\x1f\x8b':
# Data is gzip encoded, decode it
compressedstream = io.BytesIO(dat)
gzipper = gzip.GzipFile(fileobj=compressedstream)
extracted_data = gzipper.read()
dat = extracted_data
with io.BytesIO(data) as compressedStream, gzip.GzipFile(fileobj=compressedStream) as gzipper:
data = gzipper.read()
# Write it to a file
file.write(dat)
file.close()
fileHandle, path = tempfile.mkstemp()
with os.fdopen(fileHandle, "wb") as file:
file.write(data)
# return file path
return (path + " " + url)
return f"{path} {url}"