Bump requests from 2.27.1 to 2.28.1 (#1781)

* Bump requests from 2.27.1 to 2.28.1 Bumps [requests](https://github.com/psf/requests) from 2.27.1 to 2.28.1. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.27.1...v2.28.1) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update requests==2.28.1 * Update urllib3==1.26.12 * Update certifi==2022.9.24 * Update idna==3.4 * Update charset-normalizer==2.1.1 Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-07-10 15:32:38 -07:00 · 2022-11-12 17:12:19 -08:00 · 2022-11-12 17:12:19 -08:00 · af1aed0b6b
commit af1aed0b6b
parent baa0e08c2a
46 changed files with 3295 additions and 2709 deletions
--- a/lib/charset_normalizer/cd.py
+++ b/lib/charset_normalizer/cd.py
@ -1,8 +1,8 @@
 import importlib
 from codecs import IncrementalDecoder
-from collections import Counter, OrderedDict
+from collections import Counter
 from functools import lru_cache
-from typing import Dict, List, Optional, Tuple
+from typing import Counter as TypeCounter, Dict, List, Optional, Tuple

 from .assets import FREQUENCIES
 from .constant import KO_NAMES, LANGUAGE_SUPPORTED_COUNT, TOO_SMALL_SEQUENCE, ZH_NAMES
@ -24,17 +24,19 @@ def encoding_unicode_range(iana_name: str) -> List[str]:
    if is_multi_byte_encoding(iana_name):
        raise IOError("Function not supported on multi-byte code page")

-    decoder = importlib.import_module("encodings.{}".format(iana_name)).IncrementalDecoder  # type: ignore
+    decoder = importlib.import_module(
+        "encodings.{}".format(iana_name)
+    ).IncrementalDecoder

-    p = decoder(errors="ignore")  # type: IncrementalDecoder
-    seen_ranges = {}  # type: Dict[str, int]
-    character_count = 0  # type: int
+    p: IncrementalDecoder = decoder(errors="ignore")
+    seen_ranges: Dict[str, int] = {}
+    character_count: int = 0

    for i in range(0x40, 0xFF):
-        chunk = p.decode(bytes([i]))  # type: str
+        chunk: str = p.decode(bytes([i]))

        if chunk:
-            character_range = unicode_range(chunk)  # type: Optional[str]
+            character_range: Optional[str] = unicode_range(chunk)

            if character_range is None:
                continue
@ -58,7 +60,7 @@ def unicode_range_languages(primary_range: str) -> List[str]:
    """
    Return inferred languages used with a unicode range.
    """
-    languages = []  # type: List[str]
+    languages: List[str] = []

    for language, characters in FREQUENCIES.items():
        for character in characters:
@ -75,8 +77,8 @@ def encoding_languages(iana_name: str) -> List[str]:
    Single-byte encoding language association. Some code page are heavily linked to particular language(s).
    This function does the correspondence.
    """
-    unicode_ranges = encoding_unicode_range(iana_name)  # type: List[str]
-    primary_range = None  # type: Optional[str]
+    unicode_ranges: List[str] = encoding_unicode_range(iana_name)
+    primary_range: Optional[str] = None

    for specified_range in unicode_ranges:
        if "Latin" not in specified_range:
@ -115,8 +117,8 @@ def get_target_features(language: str) -> Tuple[bool, bool]:
    """
    Determine main aspects from a supported language if it contains accents and if is pure Latin.
    """
-    target_have_accents = False  # type: bool
-    target_pure_latin = True  # type: bool
+    target_have_accents: bool = False
+    target_pure_latin: bool = True

    for character in FREQUENCIES[language]:
        if not target_have_accents and is_accentuated(character):
@ -133,7 +135,7 @@ def alphabet_languages(
    """
    Return associated languages associated to given characters.
    """
-    languages = []  # type: List[Tuple[str, float]]
+    languages: List[Tuple[str, float]] = []

    source_have_accents = any(is_accentuated(character) for character in characters)

@ -147,13 +149,13 @@ def alphabet_languages(
        if target_have_accents is False and source_have_accents:
            continue

-        character_count = len(language_characters)  # type: int
+        character_count: int = len(language_characters)

-        character_match_count = len(
+        character_match_count: int = len(
            [c for c in language_characters if c in characters]
-        )  # type: int
+        )

-        ratio = character_match_count / character_count  # type: float
+        ratio: float = character_match_count / character_count

        if ratio >= 0.2:
            languages.append((language, ratio))
@ -174,36 +176,33 @@ def characters_popularity_compare(
    if language not in FREQUENCIES:
        raise ValueError("{} not available".format(language))

-    character_approved_count = 0  # type: int
+    character_approved_count: int = 0
+    FREQUENCIES_language_set = set(FREQUENCIES[language])

    for character in ordered_characters:
-        if character not in FREQUENCIES[language]:
+        if character not in FREQUENCIES_language_set:
            continue

-        characters_before_source = FREQUENCIES[language][
+        characters_before_source: List[str] = FREQUENCIES[language][
            0 : FREQUENCIES[language].index(character)
-        ]  # type: List[str]
-        characters_after_source = FREQUENCIES[language][
+        ]
+        characters_after_source: List[str] = FREQUENCIES[language][
            FREQUENCIES[language].index(character) :
-        ]  # type: List[str]
-
-        characters_before = ordered_characters[
+        ]
+        characters_before: List[str] = ordered_characters[
            0 : ordered_characters.index(character)
-        ]  # type: List[str]
-        characters_after = ordered_characters[
+        ]
+        characters_after: List[str] = ordered_characters[
            ordered_characters.index(character) :
-        ]  # type: List[str]
+        ]

-        before_match_count = [
-            e in characters_before for e in characters_before_source
-        ].count(
-            True
-        )  # type: int
-        after_match_count = [
-            e in characters_after for e in characters_after_source
-        ].count(
-            True
-        )  # type: int
+        before_match_count: int = len(
+            set(characters_before) & set(characters_before_source)
+        )
+
+        after_match_count: int = len(
+            set(characters_after) & set(characters_after_source)
+        )

        if len(characters_before_source) == 0 and before_match_count <= 4:
            character_approved_count += 1
@ -229,18 +228,18 @@ def alpha_unicode_split(decoded_sequence: str) -> List[str]:
    Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
    One containing the latin letters and the other hebrew.
    """
-    layers = OrderedDict()  # type: Dict[str, str]
+    layers: Dict[str, str] = {}

    for character in decoded_sequence:
        if character.isalpha() is False:
            continue

-        character_range = unicode_range(character)  # type: Optional[str]
+        character_range: Optional[str] = unicode_range(character)

        if character_range is None:
            continue

-        layer_target_range = None  # type: Optional[str]
+        layer_target_range: Optional[str] = None

        for discovered_range in layers:
            if (
@ -267,7 +266,7 @@ def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
    This function merge results previously given by the function coherence_ratio.
    The return type is the same as coherence_ratio.
    """
-    per_language_ratios = OrderedDict()  # type: Dict[str, List[float]]
+    per_language_ratios: Dict[str, List[float]] = {}
    for result in results:
        for sub_result in result:
            language, ratio = sub_result
@ -299,10 +298,10 @@ def coherence_ratio(
    A layer = Character extraction by alphabets/ranges.
    """

-    results = []  # type: List[Tuple[str, float]]
-    ignore_non_latin = False  # type: bool
+    results: List[Tuple[str, float]] = []
+    ignore_non_latin: bool = False

-    sufficient_match_count = 0  # type: int
+    sufficient_match_count: int = 0

    lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
    if "Latin Based" in lg_inclusion_list:
@ -310,22 +309,22 @@ def coherence_ratio(
        lg_inclusion_list.remove("Latin Based")

    for layer in alpha_unicode_split(decoded_sequence):
-        sequence_frequencies = Counter(layer)  # type: Counter
+        sequence_frequencies: TypeCounter[str] = Counter(layer)
        most_common = sequence_frequencies.most_common()

-        character_count = sum(o for c, o in most_common)  # type: int
+        character_count: int = sum(o for c, o in most_common)

        if character_count <= TOO_SMALL_SEQUENCE:
            continue

-        popular_character_ordered = [c for c, o in most_common]  # type: List[str]
+        popular_character_ordered: List[str] = [c for c, o in most_common]

        for language in lg_inclusion_list or alphabet_languages(
            popular_character_ordered, ignore_non_latin
        ):
-            ratio = characters_popularity_compare(
+            ratio: float = characters_popularity_compare(
                language, popular_character_ordered
-            )  # type: float
+            )

            if ratio < threshold:
                continue