Bump requests from 2.28.1 to 2.28.2 (#1968)

* Bump requests from 2.28.1 to 2.28.2 Bumps [requests](https://github.com/psf/requests) from 2.28.1 to 2.28.2. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.28.1...v2.28.2) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * Update requests==2.28.2 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-07-07 05:31:15 -07:00 · 2023-03-02 20:53:15 -08:00 · 2023-03-02 20:53:15 -08:00 · cc78f17be5
commit cc78f17be5
parent 70e09582da
20 changed files with 527 additions and 302 deletions
--- a/lib/charset_normalizer/init.py
+++ b/lib/charset_normalizer/init.py
@ -21,14 +21,8 @@ at <https://github.com/Ousret/charset_normalizer>.
 """
 import logging
-from .api import from_bytes, from_fp, from_path, normalize
+from .api import from_bytes, from_fp, from_path
-from .legacy import (
+from .legacy import detect
    CharsetDetector,
    CharsetDoctor,
    CharsetNormalizerMatch,
    CharsetNormalizerMatches,
    detect,
 )
 from .models import CharsetMatch, CharsetMatches
 from .utils import set_logging_handler
 from .version import VERSION, __version__
@ -37,14 +31,9 @@ __all__ = (
    "from_fp",
    "from_path",
    "from_bytes",
    "normalize",
    "detect",
    "CharsetMatch",
    "CharsetMatches",
    "CharsetNormalizerMatch",
    "CharsetNormalizerMatches",
    "CharsetDetector",
    "CharsetDoctor",
    "__version__",
    "VERSION",
    "set_logging_handler",
--- a/lib/charset_normalizer/api.py
+++ b/lib/charset_normalizer/api.py
@ -1,7 +1,5 @@
 import logging
 import warnings
 from os import PathLike
 from os.path import basename, splitext
 from typing import Any, BinaryIO, List, Optional, Set
 from .cd import (
@ -41,11 +39,12 @@ def from_bytes(
    cp_exclusion: Optional[List[str]] = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
 ) -> CharsetMatches:
    """
    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
    If there is no results, it is a strong indicator that the source is binary/not text.
-    By default, the process will extract 5 blocs of 512o each to assess the mess and coherence of a given sequence.
+    By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
@ -197,7 +196,14 @@ def from_bytes(
        if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
            logger.log(
                TRACE,
-                "Encoding %s wont be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
+                "Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
                encoding_iana,
            )
            continue
        if encoding_iana in {"utf_7"} and not bom_or_sig_available:
            logger.log(
                TRACE,
                "Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
                encoding_iana,
            )
            continue
@ -297,7 +303,13 @@ def from_bytes(
            ):
                md_chunks.append(chunk)
-                md_ratios.append(mess_ratio(chunk, threshold))
+                md_ratios.append(
                    mess_ratio(
                        chunk,
                        threshold,
                        explain is True and 1 <= len(cp_isolation) <= 2,
                    )
                )
                if md_ratios[-1] >= threshold:
                    early_stop_count += 1
@ -389,7 +401,9 @@ def from_bytes(
        if encoding_iana != "ascii":
            for chunk in md_chunks:
                chunk_languages = coherence_ratio(
-                    chunk, 0.1, ",".join(target_languages) if target_languages else None
+                    chunk,
                    language_threshold,
                    ",".join(target_languages) if target_languages else None,
                )
                cd_ratios.append(chunk_languages)
@ -491,6 +505,7 @@ def from_fp(
    cp_exclusion: Optional[List[str]] = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
 ) -> CharsetMatches:
    """
    Same thing than the function from_bytes but using a file pointer that is already ready.
@ -505,6 +520,7 @@ def from_fp(
        cp_exclusion,
        preemptive_behaviour,
        explain,
        language_threshold,
    )
@ -517,6 +533,7 @@ def from_path(
    cp_exclusion: Optional[List[str]] = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
 ) -> CharsetMatches:
    """
    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
@ -532,53 +549,5 @@ def from_path(
            cp_exclusion,
            preemptive_behaviour,
            explain,
            language_threshold,
        )
 def normalize(
    path: "PathLike[Any]",
    steps: int = 5,
    chunk_size: int = 512,
    threshold: float = 0.20,
    cp_isolation: Optional[List[str]] = None,
    cp_exclusion: Optional[List[str]] = None,
    preemptive_behaviour: bool = True,
 ) -> CharsetMatch:
    """
    Take a (text-based) file path and try to create another file next to it, this time using UTF-8.
    """
    warnings.warn(
        "normalize is deprecated and will be removed in 3.0",
        DeprecationWarning,
    )
    results = from_path(
        path,
        steps,
        chunk_size,
        threshold,
        cp_isolation,
        cp_exclusion,
        preemptive_behaviour,
    )
    filename = basename(path)
    target_extensions = list(splitext(filename))
    if len(results) == 0:
        raise IOError(
            'Unable to normalize "{}", no encoding charset seems to fit.'.format(
                filename
            )
        )
    result = results.best()
    target_extensions[0] += "-" + result.encoding  # type: ignore
    with open(
        "{}".format(str(path).replace(filename, "".join(target_extensions))), "wb"
    ) as fp:
        fp.write(result.output())  # type: ignore
    return result  # type: ignore
--- a/lib/charset_normalizer/assets/init.py
+++ b/lib/charset_normalizer/assets/init.py
@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 from typing import Dict, List
 # Language label that contain the em dash "—"
 # character are to be considered alternative seq to origin
 FREQUENCIES: Dict[str, List[str]] = {
    "English": [
        "e",
@ -30,6 +32,34 @@ FREQUENCIES: Dict[str, List[str]] = {
        "z",
        "q",
    ],
    "English—": [
        "e",
        "a",
        "t",
        "i",
        "o",
        "n",
        "s",
        "r",
        "h",
        "l",
        "d",
        "c",
        "m",
        "u",
        "f",
        "p",
        "g",
        "w",
        "b",
        "y",
        "v",
        "k",
        "j",
        "x",
        "z",
        "q",
    ],
    "German": [
        "e",
        "n",
@ -226,33 +256,303 @@ FREQUENCIES: Dict[str, List[str]] = {
        "ж",
        "ц",
    ],
    # Jap-Kanji
    "Japanese": [
        "人",
        "一",
        "大",
        "亅",
        "丁",
        "丨",
        "竹",
        "笑",
        "口",
        "日",
        "今",
        "二",
        "彳",
        "行",
        "十",
        "土",
        "丶",
        "寸",
        "寺",
        "時",
        "乙",
        "丿",
        "乂",
        "气",
        "気",
        "冂",
        "巾",
        "亠",
        "市",
        "目",
        "儿",
        "見",
        "八",
        "小",
        "凵",
        "県",
        "月",
        "彐",
        "門",
        "間",
        "木",
        "東",
        "山",
        "出",
        "本",
        "中",
        "刀",
        "分",
        "耳",
        "又",
        "取",
        "最",
        "言",
        "田",
        "心",
        "思",
        "刂",
        "前",
        "京",
        "尹",
        "事",
        "生",
        "厶",
        "云",
        "会",
        "未",
        "来",
        "白",
        "冫",
        "楽",
        "灬",
        "馬",
        "尸",
        "尺",
        "駅",
        "明",
        "耂",
        "者",
        "了",
        "阝",
        "都",
        "高",
        "卜",
        "占",
        "厂",
        "广",
        "店",
        "子",
        "申",
        "奄",
        "亻",
        "俺",
        "上",
        "方",
        "冖",
        "学",
        "衣",
        "艮",
        "食",
        "自",
    ],
    # Jap-Katakana
    "Japanese—": [
        "ー",
        "ン",
        "ス",
        "・",
        "ル",
        "ト",
        "リ",
        "イ",
        "ア",
        "ラ",
        "ッ",
        "ク",
        "ド",
        "シ",
        "レ",
        "ジ",
        "タ",
        "フ",
        "ロ",
        "カ",
        "テ",
        "マ",
        "ィ",
        "グ",
        "バ",
        "ム",
        "プ",
        "オ",
        "コ",
        "デ",
        "ニ",
        "ウ",
        "メ",
        "サ",
        "ビ",
        "ナ",
        "ブ",
        "ャ",
        "エ",
        "ュ",
        "チ",
        "キ",
        "ズ",
        "ダ",
        "パ",
        "ミ",
        "ェ",
        "ョ",
        "ハ",
        "セ",
        "ベ",
        "ガ",
        "モ",
        "ツ",
        "ネ",
        "ボ",
        "ソ",
        "ノ",
        "ァ",
        "ヴ",
        "ワ",
        "ポ",
        "ペ",
        "ピ",
        "ケ",
        "ゴ",
        "ギ",
        "ザ",
        "ホ",
        "ゲ",
        "ォ",
        "ヤ",
        "ヒ",
        "ユ",
        "ヨ",
        "ヘ",
        "ゼ",
        "ヌ",
        "ゥ",
        "ゾ",
        "ヶ",
        "ヂ",
        "ヲ",
        "ヅ",
        "ヵ",
        "ヱ",
        "ヰ",
        "ヮ",
        "ヽ",
        "゠",
        "ヾ",
        "ヷ",
        "ヿ",
        "ヸ",
        "ヹ",
        "ヺ",
    ],
    # Jap-Hiragana
    "Japanese——": [
        "の",
        "に",
        "る",
        "た",
        "は",
        "ー",
        "と",
        "は",
        "し",
        "い",
        "を",
        "で",
        "て",
        "が",
        "い",
        "ン",
        "れ",
        "な",
-        "年",
+        "れ",
        "ス",
        "っ",
        "ル",
        "か",
        "ら",
        "あ",
        "さ",
-        "も",
+        "っ",
        "り",
        "す",
        "あ",
        "も",
        "こ",
        "ま",
        "う",
        "く",
        "よ",
        "き",
        "ん",
        "め",
        "お",
        "け",
        "そ",
        "つ",
        "だ",
        "や",
        "え",
        "ど",
        "わ",
        "ち",
        "み",
        "せ",
        "じ",
        "ば",
        "へ",
        "び",
        "ず",
        "ろ",
        "ほ",
        "げ",
        "む",
        "べ",
        "ひ",
        "ょ",
        "ゆ",
        "ぶ",
        "ご",
        "ゃ",
        "ね",
        "ふ",
        "ぐ",
        "ぎ",
        "ぼ",
        "ゅ",
        "づ",
        "ざ",
        "ぞ",
        "ぬ",
        "ぜ",
        "ぱ",
        "ぽ",
        "ぷ",
        "ぴ",
        "ぃ",
        "ぁ",
        "ぇ",
        "ぺ",
        "ゞ",
        "ぢ",
        "ぉ",
        "ぅ",
        "ゐ",
        "ゝ",
        "ゑ",
        "゛",
        "゜",
        "ゎ",
        "ゔ",
        "゚",
        "ゟ",
        "゙",
        "ゕ",
        "ゖ",
    ],
    "Portuguese": [
        "a",
@ -340,6 +640,77 @@ FREQUENCIES: Dict[str, List[str]] = {
        "就",
        "出",
        "会",
        "可",
        "也",
        "你",
        "对",
        "生",
        "能",
        "而",
        "子",
        "那",
        "得",
        "于",
        "着",
        "下",
        "自",
        "之",
        "年",
        "过",
        "发",
        "后",
        "作",
        "里",
        "用",
        "道",
        "行",
        "所",
        "然",
        "家",
        "种",
        "事",
        "成",
        "方",
        "多",
        "经",
        "么",
        "去",
        "法",
        "学",
        "如",
        "都",
        "同",
        "现",
        "当",
        "没",
        "动",
        "面",
        "起",
        "看",
        "定",
        "天",
        "分",
        "还",
        "进",
        "好",
        "小",
        "部",
        "其",
        "些",
        "主",
        "样",
        "理",
        "心",
        "她",
        "本",
        "前",
        "开",
        "但",
        "因",
        "只",
        "从",
        "想",
        "实",
    ],
    "Ukrainian": [
        "о",
@ -956,34 +1327,6 @@ FREQUENCIES: Dict[str, List[str]] = {
        "ö",
        "y",
    ],
    "Simple English": [
        "e",
        "a",
        "t",
        "i",
        "o",
        "n",
        "s",
        "r",
        "h",
        "l",
        "d",
        "c",
        "m",
        "u",
        "f",
        "p",
        "g",
        "w",
        "b",
        "y",
        "v",
        "k",
        "j",
        "x",
        "z",
        "q",
    ],
    "Thai": [
        "า",
        "น",
@ -1066,31 +1409,6 @@ FREQUENCIES: Dict[str, List[str]] = {
        "ஒ",
        "ஸ",
    ],
    "Classical Chinese": [
        "之",
        "年",
        "為",
        "也",
        "以",
        "一",
        "人",
        "其",
        "者",
        "國",
        "有",
        "二",
        "十",
        "於",
        "曰",
        "三",
        "不",
        "大",
        "而",
        "子",
        "中",
        "五",
        "四",
    ],
    "Kazakh": [
        "а",
        "ы",
--- a/lib/charset_normalizer/cd.py
+++ b/lib/charset_normalizer/cd.py
@ -105,7 +105,7 @@ def mb_encoding_languages(iana_name: str) -> List[str]:
    ):
        return ["Japanese"]
    if iana_name.startswith("gb") or iana_name in ZH_NAMES:
-        return ["Chinese", "Classical Chinese"]
+        return ["Chinese"]
    if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
        return ["Korean"]
@ -179,22 +179,45 @@ def characters_popularity_compare(
    character_approved_count: int = 0
    FREQUENCIES_language_set = set(FREQUENCIES[language])
-    for character in ordered_characters:
+    ordered_characters_count: int = len(ordered_characters)
    target_language_characters_count: int = len(FREQUENCIES[language])
    large_alphabet: bool = target_language_characters_count > 26
    for character, character_rank in zip(
        ordered_characters, range(0, ordered_characters_count)
    ):
        if character not in FREQUENCIES_language_set:
            continue
        character_rank_in_language: int = FREQUENCIES[language].index(character)
        expected_projection_ratio: float = (
            target_language_characters_count / ordered_characters_count
        )
        character_rank_projection: int = int(character_rank * expected_projection_ratio)
        if (
            large_alphabet is False
            and abs(character_rank_projection - character_rank_in_language) > 4
        ):
            continue
        if (
            large_alphabet is True
            and abs(character_rank_projection - character_rank_in_language)
            < target_language_characters_count / 3
        ):
            character_approved_count += 1
            continue
        characters_before_source: List[str] = FREQUENCIES[language][
-            0 : FREQUENCIES[language].index(character)
+            0:character_rank_in_language
        ]
        characters_after_source: List[str] = FREQUENCIES[language][
-            FREQUENCIES[language].index(character) :
+            character_rank_in_language:
        ]
        characters_before: List[str] = ordered_characters[
            0 : ordered_characters.index(character)
        ]
        characters_after: List[str] = ordered_characters[
            ordered_characters.index(character) :
        ]
        characters_before: List[str] = ordered_characters[0:character_rank]
        characters_after: List[str] = ordered_characters[character_rank:]
        before_match_count: int = len(
            set(characters_before) & set(characters_before_source)
@ -289,6 +312,33 @@ def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
    return sorted(merge, key=lambda x: x[1], reverse=True)
 def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
    """
    We shall NOT return "English—" in CoherenceMatches because it is an alternative
    of "English". This function only keeps the best match and remove the em-dash in it.
    """
    index_results: Dict[str, List[float]] = dict()
    for result in results:
        language, ratio = result
        no_em_name: str = language.replace("—", "")
        if no_em_name not in index_results:
            index_results[no_em_name] = []
        index_results[no_em_name].append(ratio)
    if any(len(index_results[e]) > 1 for e in index_results):
        filtered_results: CoherenceMatches = []
        for language in index_results:
            filtered_results.append((language, max(index_results[language])))
        return filtered_results
    return results
@lru_cache(maxsize=2048)
 def coherence_ratio(
    decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
@ -336,4 +386,6 @@ def coherence_ratio(
            if sufficient_match_count >= 3:
                break
-    return sorted(results, key=lambda x: x[1], reverse=True)
+    return sorted(
        filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
    )
--- a/lib/charset_normalizer/cli/normalizer.py
+++ b/lib/charset_normalizer/cli/normalizer.py
@ -1,15 +1,12 @@
 import argparse
 import sys
 from json import dumps
-from os.path import abspath
+from os.path import abspath, basename, dirname, join, realpath
 from platform import python_version
 from typing import List, Optional
 from unicodedata import unidata_version
-try:
+import charset_normalizer.md as md_module
    from unicodedata2 import unidata_version
 except ImportError:
    from unicodedata import unidata_version
 from charset_normalizer import from_fp
 from charset_normalizer.models import CliDetectionResult
 from charset_normalizer.version import __version__
@ -124,8 +121,11 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
    parser.add_argument(
        "--version",
        action="version",
-        version="Charset-Normalizer {} - Python {} - Unicode {}".format(
+        version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
-            __version__, python_version(), unidata_version
+            __version__,
            python_version(),
            unidata_version,
            "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
        ),
        help="Show version information and exit.",
    )
@ -234,7 +234,10 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
                        my_file.close()
                    continue
-                o_: List[str] = my_file.name.split(".")
+                dir_path = dirname(realpath(my_file.name))
                file_name = basename(realpath(my_file.name))
                o_: List[str] = file_name.split(".")
                if args.replace is False:
                    o_.insert(-1, best_guess.encoding)
@ -255,7 +258,7 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
                    continue
                try:
-                    x_[0].unicode_path = abspath("./{}".format(".".join(o_)))
+                    x_[0].unicode_path = join(dir_path, ".".join(o_))
                    with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
                        fp.write(str(best_guess))
--- a/lib/charset_normalizer/constant.py
+++ b/lib/charset_normalizer/constant.py
@ -489,9 +489,7 @@ COMMON_SAFE_ASCII_CHARACTERS: Set[str] = {
 KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"}
 ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"}
 NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+")
 LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)
-# Logging LEVEL bellow DEBUG
+# Logging LEVEL below DEBUG
 TRACE: int = 5
--- a/lib/charset_normalizer/legacy.py
+++ b/lib/charset_normalizer/legacy.py
@ -1,9 +1,7 @@
 import warnings
 from typing import Dict, Optional, Union
-from .api import from_bytes, from_fp, from_path, normalize
+from .api import from_bytes
 from .constant import CHARDET_CORRESPONDENCE
 from .models import CharsetMatch, CharsetMatches
 def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
@ -43,53 +41,3 @@ def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
        "language": language,
        "confidence": confidence,
    }
 class CharsetNormalizerMatch(CharsetMatch):
    pass
 class CharsetNormalizerMatches(CharsetMatches):
    @staticmethod
    def from_fp(*args, **kwargs):  # type: ignore
        warnings.warn(  # pragma: nocover
            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
            "and scheduled to be removed in 3.0",
            DeprecationWarning,
        )
        return from_fp(*args, **kwargs)  # pragma: nocover
    @staticmethod
    def from_bytes(*args, **kwargs):  # type: ignore
        warnings.warn(  # pragma: nocover
            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
            "and scheduled to be removed in 3.0",
            DeprecationWarning,
        )
        return from_bytes(*args, **kwargs)  # pragma: nocover
    @staticmethod
    def from_path(*args, **kwargs):  # type: ignore
        warnings.warn(  # pragma: nocover
            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
            "and scheduled to be removed in 3.0",
            DeprecationWarning,
        )
        return from_path(*args, **kwargs)  # pragma: nocover
    @staticmethod
    def normalize(*args, **kwargs):  # type: ignore
        warnings.warn(  # pragma: nocover
            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
            "and scheduled to be removed in 3.0",
            DeprecationWarning,
        )
        return normalize(*args, **kwargs)  # pragma: nocover
 class CharsetDetector(CharsetNormalizerMatches):
    pass
 class CharsetDoctor(CharsetNormalizerMatches):
    pass
--- a/lib/charset_normalizer/md.py
+++ b/lib/charset_normalizer/md.py
@ -1,7 +1,12 @@
 from functools import lru_cache
 from logging import getLogger
 from typing import List, Optional
-from .constant import COMMON_SAFE_ASCII_CHARACTERS, UNICODE_SECONDARY_RANGE_KEYWORD
+from .constant import (
    COMMON_SAFE_ASCII_CHARACTERS,
    TRACE,
    UNICODE_SECONDARY_RANGE_KEYWORD,
 )
 from .utils import (
    is_accentuated,
    is_ascii,
@ -123,7 +128,7 @@ class TooManyAccentuatedPlugin(MessDetectorPlugin):
    @property
    def ratio(self) -> float:
-        if self._character_count == 0:
+        if self._character_count == 0 or self._character_count < 8:
            return 0.0
        ratio_of_accentuation: float = self._accentuated_count / self._character_count
        return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
@ -547,7 +552,20 @@ def mess_ratio(
                break
    if debug:
        logger = getLogger("charset_normalizer")
        logger.log(
            TRACE,
            "Mess-detector extended-analysis start. "
            f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
            f"maximum_threshold={maximum_threshold}",
        )
        if len(decoded_sequence) > 16:
            logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
            logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
        for dt in detectors:  # pragma: nocover
-            print(dt.__class__, dt.ratio)
+            logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
    return round(mean_mess_ratio, 3)
--- a/lib/charset_normalizer/models.py
+++ b/lib/charset_normalizer/models.py
@ -1,22 +1,9 @@
 import warnings
 from collections import Counter
 from encodings.aliases import aliases
 from hashlib import sha256
 from json import dumps
-from re import sub
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
 from typing import (
    Any,
    Counter as TypeCounter,
    Dict,
    Iterator,
    List,
    Optional,
    Tuple,
    Union,
 )
-from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE
+from .constant import TOO_BIG_SEQUENCE
 from .md import mess_ratio
 from .utils import iana_name, is_multi_byte_encoding, unicode_range
@ -65,7 +52,7 @@ class CharsetMatch:
        chaos_difference: float = abs(self.chaos - other.chaos)
        coherence_difference: float = abs(self.coherence - other.coherence)
-        # Bellow 1% difference --> Use Coherence
+        # Below 1% difference --> Use Coherence
        if chaos_difference < 0.01 and coherence_difference > 0.02:
            # When having a tough decision, use the result that decoded as many multi-byte as possible.
            if chaos_difference == 0.0 and self.coherence == other.coherence:
@ -78,45 +65,6 @@ class CharsetMatch:
    def multi_byte_usage(self) -> float:
        return 1.0 - len(str(self)) / len(self.raw)
    @property
    def chaos_secondary_pass(self) -> float:
        """
        Check once again chaos in decoded text, except this time, with full content.
        Use with caution, this can be very slow.
        Notice: Will be removed in 3.0
        """
        warnings.warn(
            "chaos_secondary_pass is deprecated and will be removed in 3.0",
            DeprecationWarning,
        )
        return mess_ratio(str(self), 1.0)
    @property
    def coherence_non_latin(self) -> float:
        """
        Coherence ratio on the first non-latin language detected if ANY.
        Notice: Will be removed in 3.0
        """
        warnings.warn(
            "coherence_non_latin is deprecated and will be removed in 3.0",
            DeprecationWarning,
        )
        return 0.0
    @property
    def w_counter(self) -> TypeCounter[str]:
        """
        Word counter instance on decoded text.
        Notice: Will be removed in 3.0
        """
        warnings.warn(
            "w_counter is deprecated and will be removed in 3.0", DeprecationWarning
        )
        string_printable_only = sub(NOT_PRINTABLE_PATTERN, " ", str(self).lower())
        return Counter(string_printable_only.split())
    def __str__(self) -> str:
        # Lazy Str Loading
        if self._string is None:
@ -252,18 +200,6 @@ class CharsetMatch:
        """
        return [self._encoding] + [m.encoding for m in self._leaves]
    def first(self) -> "CharsetMatch":
        """
        Kept for BC reasons. Will be removed in 3.0.
        """
        return self
    def best(self) -> "CharsetMatch":
        """
        Kept for BC reasons. Will be removed in 3.0.
        """
        return self
    def output(self, encoding: str = "utf_8") -> bytes:
        """
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
--- a/lib/charset_normalizer/utils.py
+++ b/lib/charset_normalizer/utils.py
@ -1,12 +1,6 @@
 try:
    # WARNING: unicodedata2 support is going to be removed in 3.0
    # Python is quickly catching up.
    import unicodedata2 as unicodedata
 except ImportError:
    import unicodedata  # type: ignore[no-redef]
 import importlib
 import logging
 import unicodedata
 from codecs import IncrementalDecoder
 from encodings.aliases import aliases
 from functools import lru_cache
@ -402,7 +396,7 @@ def cut_sequence_chunks(
            # multi-byte bad cutting detector and adjustment
            # not the cleanest way to perform that fix but clever enough for now.
-            if is_multi_byte_decoder and i > 0 and sequences[i] >= 0x80:
+            if is_multi_byte_decoder and i > 0:
                chunk_partial_size_chk: int = min(chunk_size, 16)
--- a/lib/charset_normalizer/version.py
+++ b/lib/charset_normalizer/version.py
@ -2,5 +2,5 @@
 Expose version
 """
-__version__ = "2.1.1"
+__version__ = "3.0.1"
 VERSION = __version__.split(".")
--- a/lib/requests/init.py
+++ b/lib/requests/init.py
@ -80,8 +80,8 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
    elif charset_normalizer_version:
        major, minor, patch = charset_normalizer_version.split(".")[:3]
        major, minor, patch = int(major), int(minor), int(patch)
-        # charset_normalizer >= 2.0.0 < 3.0.0
+        # charset_normalizer >= 2.0.0 < 4.0.0
-        assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0)
+        assert (2, 0, 0) <= (major, minor, patch) < (4, 0, 0)
    else:
        raise Exception("You need either charset_normalizer or chardet installed")
--- a/lib/requests/version.py
+++ b/lib/requests/version.py
@ -5,10 +5,10 @@
 __title__ = "requests"
 __description__ = "Python HTTP for Humans."
 __url__ = "https://requests.readthedocs.io"
-__version__ = "2.28.1"
+__version__ = "2.28.2"
-__build__ = 0x022801
+__build__ = 0x022802
 __author__ = "Kenneth Reitz"
 __author_email__ = "me@kennethreitz.org"
 __license__ = "Apache 2.0"
-__copyright__ = "Copyright 2022 Kenneth Reitz"
+__copyright__ = "Copyright Kenneth Reitz"
 __cake__ = "\u2728 \U0001f370 \u2728"
--- a/lib/requests/models.py
+++ b/lib/requests/models.py
@ -438,7 +438,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
        if not scheme:
            raise MissingSchema(
                f"Invalid URL {url!r}: No scheme supplied. "
-                f"Perhaps you meant http://{url}?"
+                f"Perhaps you meant https://{url}?"
            )
        if not host:
--- a/lib/urllib3/_version.py
+++ b/lib/urllib3/_version.py
@ -1,2 +1,2 @@
 # This file is protected via CODEOWNERS
-__version__ = "1.26.13"
+__version__ = "1.26.14"
--- a/lib/urllib3/contrib/appengine.py
+++ b/lib/urllib3/contrib/appengine.py
@ -224,7 +224,7 @@ class AppEngineManager(RequestMethods):
                )
        # Check if we should retry the HTTP response.
-        has_retry_after = bool(http_response.getheader("Retry-After"))
+        has_retry_after = bool(http_response.headers.get("Retry-After"))
        if retries.is_retry(method, http_response.status, has_retry_after):
            retries = retries.increment(method, url, response=http_response, _pool=self)
            log.debug("Retry: %s", url)
--- a/lib/urllib3/contrib/ntlmpool.py
+++ b/lib/urllib3/contrib/ntlmpool.py
@ -69,7 +69,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):
        log.debug("Request headers: %s", headers)
        conn.request("GET", self.authurl, None, headers)
        res = conn.getresponse()
-        reshdr = dict(res.getheaders())
+        reshdr = dict(res.headers)
        log.debug("Response status: %s %s", res.status, res.reason)
        log.debug("Response headers: %s", reshdr)
        log.debug("Response data: %s [...]", res.read(100))
@ -101,7 +101,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):
        conn.request("GET", self.authurl, None, headers)
        res = conn.getresponse()
        log.debug("Response status: %s %s", res.status, res.reason)
-        log.debug("Response headers: %s", dict(res.getheaders()))
+        log.debug("Response headers: %s", dict(res.headers))
        log.debug("Response data: %s [...]", res.read()[:100])
        if res.status != 200:
            if res.status == 401:
--- a/lib/urllib3/response.py
+++ b/lib/urllib3/response.py
@ -666,7 +666,7 @@ class HTTPResponse(io.IOBase):
    def getheaders(self):
        warnings.warn(
            "HTTPResponse.getheaders() is deprecated and will be removed "
-            "in urllib3 v2.1.0. Instead access HTTResponse.headers directly.",
+            "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
            category=DeprecationWarning,
            stacklevel=2,
        )
@ -675,7 +675,7 @@ class HTTPResponse(io.IOBase):
    def getheader(self, name, default=None):
        warnings.warn(
            "HTTPResponse.getheader() is deprecated and will be removed "
-            "in urllib3 v2.1.0. Instead use HTTResponse.headers.get(name, default).",
+            "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
            category=DeprecationWarning,
            stacklevel=2,
        )
--- a/lib/urllib3/util/url.py
+++ b/lib/urllib3/util/url.py
@ -63,7 +63,7 @@ IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
 BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
 ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")
-_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*([0-9]{0,5}))?$") % (
+_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
    REG_NAME_PAT,
    IPV4_PAT,
    IPV6_ADDRZ_PAT,
--- a/requirements.txt
+++ b/requirements.txt
@ -36,7 +36,7 @@ pyparsing==3.0.9
 python-dateutil==2.8.2
 python-twitter==3.5
 pytz==2022.7
-requests==2.28.1
+requests==2.28.2
 requests-oauthlib==1.3.1
 rumps==0.4.0; platform_system == "Darwin"
 simplejson==3.18.0
`@ -1,2 +1,2 @@`
	`# This file is protected via CODEOWNERS`	`# This file is protected via CODEOWNERS`
	`__version__ = "1.26.13"`	`__version__ = "1.26.14"`