Bump requests from 2.28.1 to 2.28.2 (#1968)

* Bump requests from 2.28.1 to 2.28.2

Bumps [requests](https://github.com/psf/requests) from 2.28.1 to 2.28.2.
- [Release notes](https://github.com/psf/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)
- [Commits](https://github.com/psf/requests/compare/v2.28.1...v2.28.2)

---
updated-dependencies:
- dependency-name: requests
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update requests==2.28.2

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2023-03-02 20:53:15 -08:00 committed by GitHub
parent 70e09582da
commit cc78f17be5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 527 additions and 302 deletions

View file

@ -21,14 +21,8 @@ at <https://github.com/Ousret/charset_normalizer>.
""" """
import logging import logging
from .api import from_bytes, from_fp, from_path, normalize from .api import from_bytes, from_fp, from_path
from .legacy import ( from .legacy import detect
CharsetDetector,
CharsetDoctor,
CharsetNormalizerMatch,
CharsetNormalizerMatches,
detect,
)
from .models import CharsetMatch, CharsetMatches from .models import CharsetMatch, CharsetMatches
from .utils import set_logging_handler from .utils import set_logging_handler
from .version import VERSION, __version__ from .version import VERSION, __version__
@ -37,14 +31,9 @@ __all__ = (
"from_fp", "from_fp",
"from_path", "from_path",
"from_bytes", "from_bytes",
"normalize",
"detect", "detect",
"CharsetMatch", "CharsetMatch",
"CharsetMatches", "CharsetMatches",
"CharsetNormalizerMatch",
"CharsetNormalizerMatches",
"CharsetDetector",
"CharsetDoctor",
"__version__", "__version__",
"VERSION", "VERSION",
"set_logging_handler", "set_logging_handler",

View file

@ -1,7 +1,5 @@
import logging import logging
import warnings
from os import PathLike from os import PathLike
from os.path import basename, splitext
from typing import Any, BinaryIO, List, Optional, Set from typing import Any, BinaryIO, List, Optional, Set
from .cd import ( from .cd import (
@ -41,11 +39,12 @@ def from_bytes(
cp_exclusion: Optional[List[str]] = None, cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True, preemptive_behaviour: bool = True,
explain: bool = False, explain: bool = False,
language_threshold: float = 0.1,
) -> CharsetMatches: ) -> CharsetMatches:
""" """
Given a raw bytes sequence, return the best possibles charset usable to render str objects. Given a raw bytes sequence, return the best possibles charset usable to render str objects.
If there is no results, it is a strong indicator that the source is binary/not text. If there is no results, it is a strong indicator that the source is binary/not text.
By default, the process will extract 5 blocs of 512o each to assess the mess and coherence of a given sequence. By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will. And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
@ -197,7 +196,14 @@ def from_bytes(
if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available: if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
logger.log( logger.log(
TRACE, TRACE,
"Encoding %s wont be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.", "Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
encoding_iana,
)
continue
if encoding_iana in {"utf_7"} and not bom_or_sig_available:
logger.log(
TRACE,
"Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
encoding_iana, encoding_iana,
) )
continue continue
@ -297,7 +303,13 @@ def from_bytes(
): ):
md_chunks.append(chunk) md_chunks.append(chunk)
md_ratios.append(mess_ratio(chunk, threshold)) md_ratios.append(
mess_ratio(
chunk,
threshold,
explain is True and 1 <= len(cp_isolation) <= 2,
)
)
if md_ratios[-1] >= threshold: if md_ratios[-1] >= threshold:
early_stop_count += 1 early_stop_count += 1
@ -389,7 +401,9 @@ def from_bytes(
if encoding_iana != "ascii": if encoding_iana != "ascii":
for chunk in md_chunks: for chunk in md_chunks:
chunk_languages = coherence_ratio( chunk_languages = coherence_ratio(
chunk, 0.1, ",".join(target_languages) if target_languages else None chunk,
language_threshold,
",".join(target_languages) if target_languages else None,
) )
cd_ratios.append(chunk_languages) cd_ratios.append(chunk_languages)
@ -491,6 +505,7 @@ def from_fp(
cp_exclusion: Optional[List[str]] = None, cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True, preemptive_behaviour: bool = True,
explain: bool = False, explain: bool = False,
language_threshold: float = 0.1,
) -> CharsetMatches: ) -> CharsetMatches:
""" """
Same thing than the function from_bytes but using a file pointer that is already ready. Same thing than the function from_bytes but using a file pointer that is already ready.
@ -505,6 +520,7 @@ def from_fp(
cp_exclusion, cp_exclusion,
preemptive_behaviour, preemptive_behaviour,
explain, explain,
language_threshold,
) )
@ -517,6 +533,7 @@ def from_path(
cp_exclusion: Optional[List[str]] = None, cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True, preemptive_behaviour: bool = True,
explain: bool = False, explain: bool = False,
language_threshold: float = 0.1,
) -> CharsetMatches: ) -> CharsetMatches:
""" """
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode. Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
@ -532,53 +549,5 @@ def from_path(
cp_exclusion, cp_exclusion,
preemptive_behaviour, preemptive_behaviour,
explain, explain,
language_threshold,
) )
def normalize(
path: "PathLike[Any]",
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
) -> CharsetMatch:
"""
Take a (text-based) file path and try to create another file next to it, this time using UTF-8.
"""
warnings.warn(
"normalize is deprecated and will be removed in 3.0",
DeprecationWarning,
)
results = from_path(
path,
steps,
chunk_size,
threshold,
cp_isolation,
cp_exclusion,
preemptive_behaviour,
)
filename = basename(path)
target_extensions = list(splitext(filename))
if len(results) == 0:
raise IOError(
'Unable to normalize "{}", no encoding charset seems to fit.'.format(
filename
)
)
result = results.best()
target_extensions[0] += "-" + result.encoding # type: ignore
with open(
"{}".format(str(path).replace(filename, "".join(target_extensions))), "wb"
) as fp:
fp.write(result.output()) # type: ignore
return result # type: ignore

View file

@ -1,6 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from typing import Dict, List from typing import Dict, List
# Language label that contain the em dash "—"
# character are to be considered alternative seq to origin
FREQUENCIES: Dict[str, List[str]] = { FREQUENCIES: Dict[str, List[str]] = {
"English": [ "English": [
"e", "e",
@ -30,6 +32,34 @@ FREQUENCIES: Dict[str, List[str]] = {
"z", "z",
"q", "q",
], ],
"English—": [
"e",
"a",
"t",
"i",
"o",
"n",
"s",
"r",
"h",
"l",
"d",
"c",
"m",
"u",
"f",
"p",
"g",
"w",
"b",
"y",
"v",
"k",
"j",
"x",
"z",
"q",
],
"German": [ "German": [
"e", "e",
"n", "n",
@ -226,33 +256,303 @@ FREQUENCIES: Dict[str, List[str]] = {
"ж", "ж",
"ц", "ц",
], ],
# Jap-Kanji
"Japanese": [ "Japanese": [
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"丿",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"广",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
],
# Jap-Katakana
"Japanese—": [
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
],
# Jap-Hiragana
"Japanese——": [
"", "",
"", "",
"", "",
"", "",
"",
"",
"", "",
"",
"", "",
"",
"", "",
"", "",
"", "",
"", "",
"",
"",
"",
"", "",
"", "",
"",
"",
"",
"", "",
"", "",
"",
"", "",
"", "",
"", "",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
], ],
"Portuguese": [ "Portuguese": [
"a", "a",
@ -340,6 +640,77 @@ FREQUENCIES: Dict[str, List[str]] = {
"", "",
"", "",
"", "",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
], ],
"Ukrainian": [ "Ukrainian": [
"о", "о",
@ -956,34 +1327,6 @@ FREQUENCIES: Dict[str, List[str]] = {
"ö", "ö",
"y", "y",
], ],
"Simple English": [
"e",
"a",
"t",
"i",
"o",
"n",
"s",
"r",
"h",
"l",
"d",
"c",
"m",
"u",
"f",
"p",
"g",
"w",
"b",
"y",
"v",
"k",
"j",
"x",
"z",
"q",
],
"Thai": [ "Thai": [
"", "",
"", "",
@ -1066,31 +1409,6 @@ FREQUENCIES: Dict[str, List[str]] = {
"", "",
"", "",
], ],
"Classical Chinese": [
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
],
"Kazakh": [ "Kazakh": [
"а", "а",
"ы", "ы",

View file

@ -105,7 +105,7 @@ def mb_encoding_languages(iana_name: str) -> List[str]:
): ):
return ["Japanese"] return ["Japanese"]
if iana_name.startswith("gb") or iana_name in ZH_NAMES: if iana_name.startswith("gb") or iana_name in ZH_NAMES:
return ["Chinese", "Classical Chinese"] return ["Chinese"]
if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES: if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
return ["Korean"] return ["Korean"]
@ -179,22 +179,45 @@ def characters_popularity_compare(
character_approved_count: int = 0 character_approved_count: int = 0
FREQUENCIES_language_set = set(FREQUENCIES[language]) FREQUENCIES_language_set = set(FREQUENCIES[language])
for character in ordered_characters: ordered_characters_count: int = len(ordered_characters)
target_language_characters_count: int = len(FREQUENCIES[language])
large_alphabet: bool = target_language_characters_count > 26
for character, character_rank in zip(
ordered_characters, range(0, ordered_characters_count)
):
if character not in FREQUENCIES_language_set: if character not in FREQUENCIES_language_set:
continue continue
character_rank_in_language: int = FREQUENCIES[language].index(character)
expected_projection_ratio: float = (
target_language_characters_count / ordered_characters_count
)
character_rank_projection: int = int(character_rank * expected_projection_ratio)
if (
large_alphabet is False
and abs(character_rank_projection - character_rank_in_language) > 4
):
continue
if (
large_alphabet is True
and abs(character_rank_projection - character_rank_in_language)
< target_language_characters_count / 3
):
character_approved_count += 1
continue
characters_before_source: List[str] = FREQUENCIES[language][ characters_before_source: List[str] = FREQUENCIES[language][
0 : FREQUENCIES[language].index(character) 0:character_rank_in_language
] ]
characters_after_source: List[str] = FREQUENCIES[language][ characters_after_source: List[str] = FREQUENCIES[language][
FREQUENCIES[language].index(character) : character_rank_in_language:
]
characters_before: List[str] = ordered_characters[
0 : ordered_characters.index(character)
]
characters_after: List[str] = ordered_characters[
ordered_characters.index(character) :
] ]
characters_before: List[str] = ordered_characters[0:character_rank]
characters_after: List[str] = ordered_characters[character_rank:]
before_match_count: int = len( before_match_count: int = len(
set(characters_before) & set(characters_before_source) set(characters_before) & set(characters_before_source)
@ -289,6 +312,33 @@ def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
return sorted(merge, key=lambda x: x[1], reverse=True) return sorted(merge, key=lambda x: x[1], reverse=True)
def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
"""
We shall NOT return "English—" in CoherenceMatches because it is an alternative
of "English". This function only keeps the best match and remove the em-dash in it.
"""
index_results: Dict[str, List[float]] = dict()
for result in results:
language, ratio = result
no_em_name: str = language.replace("", "")
if no_em_name not in index_results:
index_results[no_em_name] = []
index_results[no_em_name].append(ratio)
if any(len(index_results[e]) > 1 for e in index_results):
filtered_results: CoherenceMatches = []
for language in index_results:
filtered_results.append((language, max(index_results[language])))
return filtered_results
return results
@lru_cache(maxsize=2048) @lru_cache(maxsize=2048)
def coherence_ratio( def coherence_ratio(
decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
@ -336,4 +386,6 @@ def coherence_ratio(
if sufficient_match_count >= 3: if sufficient_match_count >= 3:
break break
return sorted(results, key=lambda x: x[1], reverse=True) return sorted(
filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
)

View file

@ -1,15 +1,12 @@
import argparse import argparse
import sys import sys
from json import dumps from json import dumps
from os.path import abspath from os.path import abspath, basename, dirname, join, realpath
from platform import python_version from platform import python_version
from typing import List, Optional from typing import List, Optional
from unicodedata import unidata_version
try: import charset_normalizer.md as md_module
from unicodedata2 import unidata_version
except ImportError:
from unicodedata import unidata_version
from charset_normalizer import from_fp from charset_normalizer import from_fp
from charset_normalizer.models import CliDetectionResult from charset_normalizer.models import CliDetectionResult
from charset_normalizer.version import __version__ from charset_normalizer.version import __version__
@ -124,8 +121,11 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
parser.add_argument( parser.add_argument(
"--version", "--version",
action="version", action="version",
version="Charset-Normalizer {} - Python {} - Unicode {}".format( version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
__version__, python_version(), unidata_version __version__,
python_version(),
unidata_version,
"OFF" if md_module.__file__.lower().endswith(".py") else "ON",
), ),
help="Show version information and exit.", help="Show version information and exit.",
) )
@ -234,7 +234,10 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
my_file.close() my_file.close()
continue continue
o_: List[str] = my_file.name.split(".") dir_path = dirname(realpath(my_file.name))
file_name = basename(realpath(my_file.name))
o_: List[str] = file_name.split(".")
if args.replace is False: if args.replace is False:
o_.insert(-1, best_guess.encoding) o_.insert(-1, best_guess.encoding)
@ -255,7 +258,7 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
continue continue
try: try:
x_[0].unicode_path = abspath("./{}".format(".".join(o_))) x_[0].unicode_path = join(dir_path, ".".join(o_))
with open(x_[0].unicode_path, "w", encoding="utf-8") as fp: with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
fp.write(str(best_guess)) fp.write(str(best_guess))

View file

@ -489,9 +489,7 @@ COMMON_SAFE_ASCII_CHARACTERS: Set[str] = {
KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"} KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"}
ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"} ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"}
NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+")
LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES) LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)
# Logging LEVEL bellow DEBUG # Logging LEVEL below DEBUG
TRACE: int = 5 TRACE: int = 5

View file

@ -1,9 +1,7 @@
import warnings
from typing import Dict, Optional, Union from typing import Dict, Optional, Union
from .api import from_bytes, from_fp, from_path, normalize from .api import from_bytes
from .constant import CHARDET_CORRESPONDENCE from .constant import CHARDET_CORRESPONDENCE
from .models import CharsetMatch, CharsetMatches
def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]: def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
@ -43,53 +41,3 @@ def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
"language": language, "language": language,
"confidence": confidence, "confidence": confidence,
} }
class CharsetNormalizerMatch(CharsetMatch):
pass
class CharsetNormalizerMatches(CharsetMatches):
@staticmethod
def from_fp(*args, **kwargs): # type: ignore
warnings.warn( # pragma: nocover
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
"and scheduled to be removed in 3.0",
DeprecationWarning,
)
return from_fp(*args, **kwargs) # pragma: nocover
@staticmethod
def from_bytes(*args, **kwargs): # type: ignore
warnings.warn( # pragma: nocover
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
"and scheduled to be removed in 3.0",
DeprecationWarning,
)
return from_bytes(*args, **kwargs) # pragma: nocover
@staticmethod
def from_path(*args, **kwargs): # type: ignore
warnings.warn( # pragma: nocover
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
"and scheduled to be removed in 3.0",
DeprecationWarning,
)
return from_path(*args, **kwargs) # pragma: nocover
@staticmethod
def normalize(*args, **kwargs): # type: ignore
warnings.warn( # pragma: nocover
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
"and scheduled to be removed in 3.0",
DeprecationWarning,
)
return normalize(*args, **kwargs) # pragma: nocover
class CharsetDetector(CharsetNormalizerMatches):
pass
class CharsetDoctor(CharsetNormalizerMatches):
pass

View file

@ -1,7 +1,12 @@
from functools import lru_cache from functools import lru_cache
from logging import getLogger
from typing import List, Optional from typing import List, Optional
from .constant import COMMON_SAFE_ASCII_CHARACTERS, UNICODE_SECONDARY_RANGE_KEYWORD from .constant import (
COMMON_SAFE_ASCII_CHARACTERS,
TRACE,
UNICODE_SECONDARY_RANGE_KEYWORD,
)
from .utils import ( from .utils import (
is_accentuated, is_accentuated,
is_ascii, is_ascii,
@ -123,7 +128,7 @@ class TooManyAccentuatedPlugin(MessDetectorPlugin):
@property @property
def ratio(self) -> float: def ratio(self) -> float:
if self._character_count == 0: if self._character_count == 0 or self._character_count < 8:
return 0.0 return 0.0
ratio_of_accentuation: float = self._accentuated_count / self._character_count ratio_of_accentuation: float = self._accentuated_count / self._character_count
return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0 return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
@ -547,7 +552,20 @@ def mess_ratio(
break break
if debug: if debug:
logger = getLogger("charset_normalizer")
logger.log(
TRACE,
"Mess-detector extended-analysis start. "
f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
f"maximum_threshold={maximum_threshold}",
)
if len(decoded_sequence) > 16:
logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
for dt in detectors: # pragma: nocover for dt in detectors: # pragma: nocover
print(dt.__class__, dt.ratio) logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
return round(mean_mess_ratio, 3) return round(mean_mess_ratio, 3)

View file

@ -1,22 +1,9 @@
import warnings
from collections import Counter
from encodings.aliases import aliases from encodings.aliases import aliases
from hashlib import sha256 from hashlib import sha256
from json import dumps from json import dumps
from re import sub from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
from typing import (
Any,
Counter as TypeCounter,
Dict,
Iterator,
List,
Optional,
Tuple,
Union,
)
from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE from .constant import TOO_BIG_SEQUENCE
from .md import mess_ratio
from .utils import iana_name, is_multi_byte_encoding, unicode_range from .utils import iana_name, is_multi_byte_encoding, unicode_range
@ -65,7 +52,7 @@ class CharsetMatch:
chaos_difference: float = abs(self.chaos - other.chaos) chaos_difference: float = abs(self.chaos - other.chaos)
coherence_difference: float = abs(self.coherence - other.coherence) coherence_difference: float = abs(self.coherence - other.coherence)
# Bellow 1% difference --> Use Coherence # Below 1% difference --> Use Coherence
if chaos_difference < 0.01 and coherence_difference > 0.02: if chaos_difference < 0.01 and coherence_difference > 0.02:
# When having a tough decision, use the result that decoded as many multi-byte as possible. # When having a tough decision, use the result that decoded as many multi-byte as possible.
if chaos_difference == 0.0 and self.coherence == other.coherence: if chaos_difference == 0.0 and self.coherence == other.coherence:
@ -78,45 +65,6 @@ class CharsetMatch:
def multi_byte_usage(self) -> float: def multi_byte_usage(self) -> float:
return 1.0 - len(str(self)) / len(self.raw) return 1.0 - len(str(self)) / len(self.raw)
@property
def chaos_secondary_pass(self) -> float:
"""
Check once again chaos in decoded text, except this time, with full content.
Use with caution, this can be very slow.
Notice: Will be removed in 3.0
"""
warnings.warn(
"chaos_secondary_pass is deprecated and will be removed in 3.0",
DeprecationWarning,
)
return mess_ratio(str(self), 1.0)
@property
def coherence_non_latin(self) -> float:
"""
Coherence ratio on the first non-latin language detected if ANY.
Notice: Will be removed in 3.0
"""
warnings.warn(
"coherence_non_latin is deprecated and will be removed in 3.0",
DeprecationWarning,
)
return 0.0
@property
def w_counter(self) -> TypeCounter[str]:
"""
Word counter instance on decoded text.
Notice: Will be removed in 3.0
"""
warnings.warn(
"w_counter is deprecated and will be removed in 3.0", DeprecationWarning
)
string_printable_only = sub(NOT_PRINTABLE_PATTERN, " ", str(self).lower())
return Counter(string_printable_only.split())
def __str__(self) -> str: def __str__(self) -> str:
# Lazy Str Loading # Lazy Str Loading
if self._string is None: if self._string is None:
@ -252,18 +200,6 @@ class CharsetMatch:
""" """
return [self._encoding] + [m.encoding for m in self._leaves] return [self._encoding] + [m.encoding for m in self._leaves]
def first(self) -> "CharsetMatch":
"""
Kept for BC reasons. Will be removed in 3.0.
"""
return self
def best(self) -> "CharsetMatch":
"""
Kept for BC reasons. Will be removed in 3.0.
"""
return self
def output(self, encoding: str = "utf_8") -> bytes: def output(self, encoding: str = "utf_8") -> bytes:
""" """
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8. Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.

View file

@ -1,12 +1,6 @@
try:
# WARNING: unicodedata2 support is going to be removed in 3.0
# Python is quickly catching up.
import unicodedata2 as unicodedata
except ImportError:
import unicodedata # type: ignore[no-redef]
import importlib import importlib
import logging import logging
import unicodedata
from codecs import IncrementalDecoder from codecs import IncrementalDecoder
from encodings.aliases import aliases from encodings.aliases import aliases
from functools import lru_cache from functools import lru_cache
@ -402,7 +396,7 @@ def cut_sequence_chunks(
# multi-byte bad cutting detector and adjustment # multi-byte bad cutting detector and adjustment
# not the cleanest way to perform that fix but clever enough for now. # not the cleanest way to perform that fix but clever enough for now.
if is_multi_byte_decoder and i > 0 and sequences[i] >= 0x80: if is_multi_byte_decoder and i > 0:
chunk_partial_size_chk: int = min(chunk_size, 16) chunk_partial_size_chk: int = min(chunk_size, 16)

View file

@ -2,5 +2,5 @@
Expose version Expose version
""" """
__version__ = "2.1.1" __version__ = "3.0.1"
VERSION = __version__.split(".") VERSION = __version__.split(".")

View file

@ -80,8 +80,8 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
elif charset_normalizer_version: elif charset_normalizer_version:
major, minor, patch = charset_normalizer_version.split(".")[:3] major, minor, patch = charset_normalizer_version.split(".")[:3]
major, minor, patch = int(major), int(minor), int(patch) major, minor, patch = int(major), int(minor), int(patch)
# charset_normalizer >= 2.0.0 < 3.0.0 # charset_normalizer >= 2.0.0 < 4.0.0
assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0) assert (2, 0, 0) <= (major, minor, patch) < (4, 0, 0)
else: else:
raise Exception("You need either charset_normalizer or chardet installed") raise Exception("You need either charset_normalizer or chardet installed")

View file

@ -5,10 +5,10 @@
__title__ = "requests" __title__ = "requests"
__description__ = "Python HTTP for Humans." __description__ = "Python HTTP for Humans."
__url__ = "https://requests.readthedocs.io" __url__ = "https://requests.readthedocs.io"
__version__ = "2.28.1" __version__ = "2.28.2"
__build__ = 0x022801 __build__ = 0x022802
__author__ = "Kenneth Reitz" __author__ = "Kenneth Reitz"
__author_email__ = "me@kennethreitz.org" __author_email__ = "me@kennethreitz.org"
__license__ = "Apache 2.0" __license__ = "Apache 2.0"
__copyright__ = "Copyright 2022 Kenneth Reitz" __copyright__ = "Copyright Kenneth Reitz"
__cake__ = "\u2728 \U0001f370 \u2728" __cake__ = "\u2728 \U0001f370 \u2728"

View file

@ -438,7 +438,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
if not scheme: if not scheme:
raise MissingSchema( raise MissingSchema(
f"Invalid URL {url!r}: No scheme supplied. " f"Invalid URL {url!r}: No scheme supplied. "
f"Perhaps you meant http://{url}?" f"Perhaps you meant https://{url}?"
) )
if not host: if not host:

View file

@ -1,2 +1,2 @@
# This file is protected via CODEOWNERS # This file is protected via CODEOWNERS
__version__ = "1.26.13" __version__ = "1.26.14"

View file

@ -224,7 +224,7 @@ class AppEngineManager(RequestMethods):
) )
# Check if we should retry the HTTP response. # Check if we should retry the HTTP response.
has_retry_after = bool(http_response.getheader("Retry-After")) has_retry_after = bool(http_response.headers.get("Retry-After"))
if retries.is_retry(method, http_response.status, has_retry_after): if retries.is_retry(method, http_response.status, has_retry_after):
retries = retries.increment(method, url, response=http_response, _pool=self) retries = retries.increment(method, url, response=http_response, _pool=self)
log.debug("Retry: %s", url) log.debug("Retry: %s", url)

View file

@ -69,7 +69,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):
log.debug("Request headers: %s", headers) log.debug("Request headers: %s", headers)
conn.request("GET", self.authurl, None, headers) conn.request("GET", self.authurl, None, headers)
res = conn.getresponse() res = conn.getresponse()
reshdr = dict(res.getheaders()) reshdr = dict(res.headers)
log.debug("Response status: %s %s", res.status, res.reason) log.debug("Response status: %s %s", res.status, res.reason)
log.debug("Response headers: %s", reshdr) log.debug("Response headers: %s", reshdr)
log.debug("Response data: %s [...]", res.read(100)) log.debug("Response data: %s [...]", res.read(100))
@ -101,7 +101,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):
conn.request("GET", self.authurl, None, headers) conn.request("GET", self.authurl, None, headers)
res = conn.getresponse() res = conn.getresponse()
log.debug("Response status: %s %s", res.status, res.reason) log.debug("Response status: %s %s", res.status, res.reason)
log.debug("Response headers: %s", dict(res.getheaders())) log.debug("Response headers: %s", dict(res.headers))
log.debug("Response data: %s [...]", res.read()[:100]) log.debug("Response data: %s [...]", res.read()[:100])
if res.status != 200: if res.status != 200:
if res.status == 401: if res.status == 401:

View file

@ -666,7 +666,7 @@ class HTTPResponse(io.IOBase):
def getheaders(self): def getheaders(self):
warnings.warn( warnings.warn(
"HTTPResponse.getheaders() is deprecated and will be removed " "HTTPResponse.getheaders() is deprecated and will be removed "
"in urllib3 v2.1.0. Instead access HTTResponse.headers directly.", "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
category=DeprecationWarning, category=DeprecationWarning,
stacklevel=2, stacklevel=2,
) )
@ -675,7 +675,7 @@ class HTTPResponse(io.IOBase):
def getheader(self, name, default=None): def getheader(self, name, default=None):
warnings.warn( warnings.warn(
"HTTPResponse.getheader() is deprecated and will be removed " "HTTPResponse.getheader() is deprecated and will be removed "
"in urllib3 v2.1.0. Instead use HTTResponse.headers.get(name, default).", "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
category=DeprecationWarning, category=DeprecationWarning,
stacklevel=2, stacklevel=2,
) )

View file

@ -63,7 +63,7 @@ IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$") BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$") ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")
_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*([0-9]{0,5}))?$") % ( _HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
REG_NAME_PAT, REG_NAME_PAT,
IPV4_PAT, IPV4_PAT,
IPV6_ADDRZ_PAT, IPV6_ADDRZ_PAT,

View file

@ -36,7 +36,7 @@ pyparsing==3.0.9
python-dateutil==2.8.2 python-dateutil==2.8.2
python-twitter==3.5 python-twitter==3.5
pytz==2022.7 pytz==2022.7
requests==2.28.1 requests==2.28.2
requests-oauthlib==1.3.1 requests-oauthlib==1.3.1
rumps==0.4.0; platform_system == "Darwin" rumps==0.4.0; platform_system == "Darwin"
simplejson==3.18.0 simplejson==3.18.0