Bump requests from 2.27.1 to 2.28.1 (#1781)

* Bump requests from 2.27.1 to 2.28.1

Bumps [requests](https://github.com/psf/requests) from 2.27.1 to 2.28.1.
- [Release notes](https://github.com/psf/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)
- [Commits](https://github.com/psf/requests/compare/v2.27.1...v2.28.1)

---
updated-dependencies:
- dependency-name: requests
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update requests==2.28.1

* Update urllib3==1.26.12

* Update certifi==2022.9.24

* Update idna==3.4

* Update charset-normalizer==2.1.1

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2022-11-12 17:12:19 -08:00 committed by GitHub
parent baa0e08c2a
commit af1aed0b6b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
46 changed files with 3295 additions and 2709 deletions

View file

@ -1,11 +1,8 @@
import logging
import warnings
from os import PathLike
from os.path import basename, splitext
from typing import BinaryIO, List, Optional, Set
try:
from os import PathLike
except ImportError: # pragma: no cover
PathLike = str # type: ignore
from typing import Any, BinaryIO, List, Optional, Set
from .cd import (
coherence_ratio,
@ -18,6 +15,7 @@ from .md import mess_ratio
from .models import CharsetMatch, CharsetMatches
from .utils import (
any_specified_encoding,
cut_sequence_chunks,
iana_name,
identify_sig_or_bom,
is_cp_similar,
@ -39,8 +37,8 @@ def from_bytes(
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.2,
cp_isolation: List[str] = None,
cp_exclusion: List[str] = None,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
) -> CharsetMatches:
@ -70,11 +68,11 @@ def from_bytes(
)
if explain:
previous_logger_level = logger.level # type: int
previous_logger_level: int = logger.level
logger.addHandler(explain_handler)
logger.setLevel(TRACE)
length = len(sequences) # type: int
length: int = len(sequences)
if length == 0:
logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
@ -119,8 +117,8 @@ def from_bytes(
if steps > 1 and length / steps < chunk_size:
chunk_size = int(length / steps)
is_too_small_sequence = len(sequences) < TOO_SMALL_SEQUENCE # type: bool
is_too_large_sequence = len(sequences) >= TOO_BIG_SEQUENCE # type: bool
is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
if is_too_small_sequence:
logger.log(
@ -137,11 +135,11 @@ def from_bytes(
),
)
prioritized_encodings = [] # type: List[str]
prioritized_encodings: List[str] = []
specified_encoding = (
specified_encoding: Optional[str] = (
any_specified_encoding(sequences) if preemptive_behaviour else None
) # type: Optional[str]
)
if specified_encoding is not None:
prioritized_encodings.append(specified_encoding)
@ -151,15 +149,15 @@ def from_bytes(
specified_encoding,
)
tested = set() # type: Set[str]
tested_but_hard_failure = [] # type: List[str]
tested_but_soft_failure = [] # type: List[str]
tested: Set[str] = set()
tested_but_hard_failure: List[str] = []
tested_but_soft_failure: List[str] = []
fallback_ascii = None # type: Optional[CharsetMatch]
fallback_u8 = None # type: Optional[CharsetMatch]
fallback_specified = None # type: Optional[CharsetMatch]
fallback_ascii: Optional[CharsetMatch] = None
fallback_u8: Optional[CharsetMatch] = None
fallback_specified: Optional[CharsetMatch] = None
results = CharsetMatches() # type: CharsetMatches
results: CharsetMatches = CharsetMatches()
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
@ -190,11 +188,11 @@ def from_bytes(
tested.add(encoding_iana)
decoded_payload = None # type: Optional[str]
bom_or_sig_available = sig_encoding == encoding_iana # type: bool
strip_sig_or_bom = bom_or_sig_available and should_strip_sig_or_bom(
decoded_payload: Optional[str] = None
bom_or_sig_available: bool = sig_encoding == encoding_iana
strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
encoding_iana
) # type: bool
)
if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
logger.log(
@ -205,7 +203,7 @@ def from_bytes(
continue
try:
is_multi_byte_decoder = is_multi_byte_encoding(encoding_iana) # type: bool
is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
except (ModuleNotFoundError, ImportError):
logger.log(
TRACE,
@ -240,7 +238,7 @@ def from_bytes(
tested_but_hard_failure.append(encoding_iana)
continue
similar_soft_failure_test = False # type: bool
similar_soft_failure_test: bool = False
for encoding_soft_failed in tested_but_soft_failure:
if is_cp_similar(encoding_iana, encoding_soft_failed):
@ -262,11 +260,11 @@ def from_bytes(
int(length / steps),
)
multi_byte_bonus = (
multi_byte_bonus: bool = (
is_multi_byte_decoder
and decoded_payload is not None
and len(decoded_payload) < length
) # type: bool
)
if multi_byte_bonus:
logger.log(
@ -276,72 +274,47 @@ def from_bytes(
encoding_iana,
)
max_chunk_gave_up = int(len(r_) / 4) # type: int
max_chunk_gave_up: int = int(len(r_) / 4)
max_chunk_gave_up = max(max_chunk_gave_up, 2)
early_stop_count = 0 # type: int
early_stop_count: int = 0
lazy_str_hard_failure = False
md_chunks = [] # type: List[str]
md_chunks: List[str] = []
md_ratios = []
for i in r_:
if i + chunk_size > length + 8:
continue
cut_sequence = sequences[i : i + chunk_size]
if bom_or_sig_available and strip_sig_or_bom is False:
cut_sequence = sig_payload + cut_sequence
try:
chunk = cut_sequence.decode(
encoding_iana,
errors="ignore" if is_multi_byte_decoder else "strict",
) # type: str
except UnicodeDecodeError as e: # Lazy str loading may have missed something there
logger.log(
TRACE,
"LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
early_stop_count = max_chunk_gave_up
lazy_str_hard_failure = True
break
# multi-byte bad cutting detector and adjustment
# not the cleanest way to perform that fix but clever enough for now.
if is_multi_byte_decoder and i > 0 and sequences[i] >= 0x80:
chunk_partial_size_chk = min(chunk_size, 16) # type: int
if (
decoded_payload
and chunk[:chunk_partial_size_chk] not in decoded_payload
):
for j in range(i, i - 4, -1):
cut_sequence = sequences[j : i + chunk_size]
if bom_or_sig_available and strip_sig_or_bom is False:
cut_sequence = sig_payload + cut_sequence
chunk = cut_sequence.decode(encoding_iana, errors="ignore")
if chunk[:chunk_partial_size_chk] in decoded_payload:
break
md_chunks.append(chunk)
md_ratios.append(mess_ratio(chunk, threshold))
if md_ratios[-1] >= threshold:
early_stop_count += 1
if (early_stop_count >= max_chunk_gave_up) or (
bom_or_sig_available and strip_sig_or_bom is False
try:
for chunk in cut_sequence_chunks(
sequences,
encoding_iana,
r_,
chunk_size,
bom_or_sig_available,
strip_sig_or_bom,
sig_payload,
is_multi_byte_decoder,
decoded_payload,
):
break
md_chunks.append(chunk)
md_ratios.append(mess_ratio(chunk, threshold))
if md_ratios[-1] >= threshold:
early_stop_count += 1
if (early_stop_count >= max_chunk_gave_up) or (
bom_or_sig_available and strip_sig_or_bom is False
):
break
except UnicodeDecodeError as e: # Lazy str loading may have missed something there
logger.log(
TRACE,
"LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
early_stop_count = max_chunk_gave_up
lazy_str_hard_failure = True
# We might want to check the sequence again with the whole content
# Only if initial MD tests passes
@ -362,9 +335,7 @@ def from_bytes(
tested_but_hard_failure.append(encoding_iana)
continue
mean_mess_ratio = (
sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
) # type: float
mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
tested_but_soft_failure.append(encoding_iana)
logger.log(
@ -399,7 +370,7 @@ def from_bytes(
)
if not is_multi_byte_decoder:
target_languages = encoding_languages(encoding_iana) # type: List[str]
target_languages: List[str] = encoding_languages(encoding_iana)
else:
target_languages = mb_encoding_languages(encoding_iana)
@ -516,8 +487,8 @@ def from_fp(
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: List[str] = None,
cp_exclusion: List[str] = None,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
) -> CharsetMatches:
@ -538,12 +509,12 @@ def from_fp(
def from_path(
path: PathLike,
path: "PathLike[Any]",
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: List[str] = None,
cp_exclusion: List[str] = None,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
) -> CharsetMatches:
@ -565,17 +536,22 @@ def from_path(
def normalize(
path: PathLike,
path: "PathLike[Any]",
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: List[str] = None,
cp_exclusion: List[str] = None,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
) -> CharsetMatch:
"""
Take a (text-based) file path and try to create another file next to it, this time using UTF-8.
"""
warnings.warn(
"normalize is deprecated and will be removed in 3.0",
DeprecationWarning,
)
results = from_path(
path,
steps,