Bump requests from 2.28.1 to 2.28.2 (#1968)

* Bump requests from 2.28.1 to 2.28.2 Bumps [requests](https://github.com/psf/requests) from 2.28.1 to 2.28.2. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.28.1...v2.28.2) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * Update requests==2.28.2 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-07-07 05:31:15 -07:00 · 2023-03-02 20:53:15 -08:00 · 2023-03-02 20:53:15 -08:00 · cc78f17be5
commit cc78f17be5
parent 70e09582da
20 changed files with 527 additions and 302 deletions
--- a/lib/charset_normalizer/init.py
+++ b/lib/charset_normalizer/init.py
@ -21,14 +21,8 @@ at <https://github.com/Ousret/charset_normalizer>.
 """
 import logging

-from .api import from_bytes, from_fp, from_path, normalize
-from .legacy import (
-    CharsetDetector,
-    CharsetDoctor,
-    CharsetNormalizerMatch,
-    CharsetNormalizerMatches,
-    detect,
-)
+from .api import from_bytes, from_fp, from_path
+from .legacy import detect
 from .models import CharsetMatch, CharsetMatches
 from .utils import set_logging_handler
 from .version import VERSION, __version__
@ -37,14 +31,9 @@ __all__ = (
    "from_fp",
    "from_path",
    "from_bytes",
-    "normalize",
    "detect",
    "CharsetMatch",
    "CharsetMatches",
-    "CharsetNormalizerMatch",
-    "CharsetNormalizerMatches",
-    "CharsetDetector",
-    "CharsetDoctor",
    "__version__",
    "VERSION",
    "set_logging_handler",
--- a/lib/charset_normalizer/api.py
+++ b/lib/charset_normalizer/api.py
@ -1,7 +1,5 @@
 import logging
-import warnings
 from os import PathLike
-from os.path import basename, splitext
 from typing import Any, BinaryIO, List, Optional, Set

 from .cd import (
@ -41,11 +39,12 @@ def from_bytes(
    cp_exclusion: Optional[List[str]] = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
+    language_threshold: float = 0.1,
 ) -> CharsetMatches:
    """
    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
    If there is no results, it is a strong indicator that the source is binary/not text.
-    By default, the process will extract 5 blocs of 512o each to assess the mess and coherence of a given sequence.
+    By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.

    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
@ -197,7 +196,14 @@ def from_bytes(
        if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
            logger.log(
                TRACE,
-                "Encoding %s wont be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
+                "Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
+                encoding_iana,
+            )
+            continue
+        if encoding_iana in {"utf_7"} and not bom_or_sig_available:
+            logger.log(
+                TRACE,
+                "Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
                encoding_iana,
            )
            continue
@ -297,7 +303,13 @@ def from_bytes(
            ):
                md_chunks.append(chunk)

-                md_ratios.append(mess_ratio(chunk, threshold))
+                md_ratios.append(
+                    mess_ratio(
+                        chunk,
+                        threshold,
+                        explain is True and 1 <= len(cp_isolation) <= 2,
+                    )
+                )

                if md_ratios[-1] >= threshold:
                    early_stop_count += 1
@ -389,7 +401,9 @@ def from_bytes(
        if encoding_iana != "ascii":
            for chunk in md_chunks:
                chunk_languages = coherence_ratio(
-                    chunk, 0.1, ",".join(target_languages) if target_languages else None
+                    chunk,
+                    language_threshold,
+                    ",".join(target_languages) if target_languages else None,
                )

                cd_ratios.append(chunk_languages)
@ -491,6 +505,7 @@ def from_fp(
    cp_exclusion: Optional[List[str]] = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
+    language_threshold: float = 0.1,
 ) -> CharsetMatches:
    """
    Same thing than the function from_bytes but using a file pointer that is already ready.
@ -505,6 +520,7 @@ def from_fp(
        cp_exclusion,
        preemptive_behaviour,
        explain,
+        language_threshold,
    )


@ -517,6 +533,7 @@ def from_path(
    cp_exclusion: Optional[List[str]] = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
+    language_threshold: float = 0.1,
 ) -> CharsetMatches:
    """
    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
@ -532,53 +549,5 @@ def from_path(
            cp_exclusion,
            preemptive_behaviour,
            explain,
+            language_threshold,
        )
-
-
-def normalize(
-    path: "PathLike[Any]",
-    steps: int = 5,
-    chunk_size: int = 512,
-    threshold: float = 0.20,
-    cp_isolation: Optional[List[str]] = None,
-    cp_exclusion: Optional[List[str]] = None,
-    preemptive_behaviour: bool = True,
-) -> CharsetMatch:
-    """
-    Take a (text-based) file path and try to create another file next to it, this time using UTF-8.
-    """
-    warnings.warn(
-        "normalize is deprecated and will be removed in 3.0",
-        DeprecationWarning,
-    )
-
-    results = from_path(
-        path,
-        steps,
-        chunk_size,
-        threshold,
-        cp_isolation,
-        cp_exclusion,
-        preemptive_behaviour,
-    )
-
-    filename = basename(path)
-    target_extensions = list(splitext(filename))
-
-    if len(results) == 0:
-        raise IOError(
-            'Unable to normalize "{}", no encoding charset seems to fit.'.format(
-                filename
-            )
-        )
-
-    result = results.best()
-
-    target_extensions[0] += "-" + result.encoding  # type: ignore
-
-    with open(
-        "{}".format(str(path).replace(filename, "".join(target_extensions))), "wb"
-    ) as fp:
-        fp.write(result.output())  # type: ignore
-
-    return result  # type: ignore
--- a/lib/charset_normalizer/assets/init.py
+++ b/lib/charset_normalizer/assets/init.py
@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 from typing import Dict, List

+# Language label that contain the em dash "—"
+# character are to be considered alternative seq to origin
 FREQUENCIES: Dict[str, List[str]] = {
    "English": [
        "e",
@ -30,6 +32,34 @@ FREQUENCIES: Dict[str, List[str]] = {
        "z",
        "q",
    ],
+    "English—": [
+        "e",
+        "a",
+        "t",
+        "i",
+        "o",
+        "n",
+        "s",
+        "r",
+        "h",
+        "l",
+        "d",
+        "c",
+        "m",
+        "u",
+        "f",
+        "p",
+        "g",
+        "w",
+        "b",
+        "y",
+        "v",
+        "k",
+        "j",
+        "x",
+        "z",
+        "q",
+    ],
    "German": [
        "e",
        "n",
@ -226,33 +256,303 @@ FREQUENCIES: Dict[str, List[str]] = {
        "ж",
        "ц",
    ],
+    # Jap-Kanji
    "Japanese": [
+        "人",
+        "一",
+        "大",
+        "亅",
+        "丁",
+        "丨",
+        "竹",
+        "笑",
+        "口",
+        "日",
+        "今",
+        "二",
+        "彳",
+        "行",
+        "十",
+        "土",
+        "丶",
+        "寸",
+        "寺",
+        "時",
+        "乙",
+        "丿",
+        "乂",
+        "气",
+        "気",
+        "冂",
+        "巾",
+        "亠",
+        "市",
+        "目",
+        "儿",
+        "見",
+        "八",
+        "小",
+        "凵",
+        "県",
+        "月",
+        "彐",
+        "門",
+        "間",
+        "木",
+        "東",
+        "山",
+        "出",
+        "本",
+        "中",
+        "刀",
+        "分",
+        "耳",
+        "又",
+        "取",
+        "最",
+        "言",
+        "田",
+        "心",
+        "思",
+        "刂",
+        "前",
+        "京",
+        "尹",
+        "事",
+        "生",
+        "厶",
+        "云",
+        "会",
+        "未",
+        "来",
+        "白",
+        "冫",
+        "楽",
+        "灬",
+        "馬",
+        "尸",
+        "尺",
+        "駅",
+        "明",
+        "耂",
+        "者",
+        "了",
+        "阝",
+        "都",
+        "高",
+        "卜",
+        "占",
+        "厂",
+        "广",
+        "店",
+        "子",
+        "申",
+        "奄",
+        "亻",
+        "俺",
+        "上",
+        "方",
+        "冖",
+        "学",
+        "衣",
+        "艮",
+        "食",
+        "自",
+    ],
+    # Jap-Katakana
+    "Japanese—": [
+        "ー",
+        "ン",
+        "ス",
+        "・",
+        "ル",
+        "ト",
+        "リ",
+        "イ",
+        "ア",
+        "ラ",
+        "ッ",
+        "ク",
+        "ド",
+        "シ",
+        "レ",
+        "ジ",
+        "タ",
+        "フ",
+        "ロ",
+        "カ",
+        "テ",
+        "マ",
+        "ィ",
+        "グ",
+        "バ",
+        "ム",
+        "プ",
+        "オ",
+        "コ",
+        "デ",
+        "ニ",
+        "ウ",
+        "メ",
+        "サ",
+        "ビ",
+        "ナ",
+        "ブ",
+        "ャ",
+        "エ",
+        "ュ",
+        "チ",
+        "キ",
+        "ズ",
+        "ダ",
+        "パ",
+        "ミ",
+        "ェ",
+        "ョ",
+        "ハ",
+        "セ",
+        "ベ",
+        "ガ",
+        "モ",
+        "ツ",
+        "ネ",
+        "ボ",
+        "ソ",
+        "ノ",
+        "ァ",
+        "ヴ",
+        "ワ",
+        "ポ",
+        "ペ",
+        "ピ",
+        "ケ",
+        "ゴ",
+        "ギ",
+        "ザ",
+        "ホ",
+        "ゲ",
+        "ォ",
+        "ヤ",
+        "ヒ",
+        "ユ",
+        "ヨ",
+        "ヘ",
+        "ゼ",
+        "ヌ",
+        "ゥ",
+        "ゾ",
+        "ヶ",
+        "ヂ",
+        "ヲ",
+        "ヅ",
+        "ヵ",
+        "ヱ",
+        "ヰ",
+        "ヮ",
+        "ヽ",
+        "゠",
+        "ヾ",
+        "ヷ",
+        "ヿ",
+        "ヸ",
+        "ヹ",
+        "ヺ",
+    ],
+    # Jap-Hiragana
+    "Japanese——": [
        "の",
        "に",
        "る",
        "た",
-        "は",
-        "ー",
        "と",
+        "は",
        "し",
+        "い",
        "を",
        "で",
        "て",
        "が",
-        "い",
-        "ン",
-        "れ",
        "な",
-        "年",
-        "ス",
-        "っ",
-        "ル",
+        "れ",
        "か",
        "ら",
-        "あ",
        "さ",
-        "も",
+        "っ",
        "り",
+        "す",
+        "あ",
+        "も",
+        "こ",
+        "ま",
+        "う",
+        "く",
+        "よ",
+        "き",
+        "ん",
+        "め",
+        "お",
+        "け",
+        "そ",
+        "つ",
+        "だ",
+        "や",
+        "え",
+        "ど",
+        "わ",
+        "ち",
+        "み",
+        "せ",
+        "じ",
+        "ば",
+        "へ",
+        "び",
+        "ず",
+        "ろ",
+        "ほ",
+        "げ",
+        "む",
+        "べ",
+        "ひ",
+        "ょ",
+        "ゆ",
+        "ぶ",
+        "ご",
+        "ゃ",
+        "ね",
+        "ふ",
+        "ぐ",
+        "ぎ",
+        "ぼ",
+        "ゅ",
+        "づ",
+        "ざ",
+        "ぞ",
+        "ぬ",
+        "ぜ",
+        "ぱ",
+        "ぽ",
+        "ぷ",
+        "ぴ",
+        "ぃ",
+        "ぁ",
+        "ぇ",
+        "ぺ",
+        "ゞ",
+        "ぢ",
+        "ぉ",
+        "ぅ",
+        "ゐ",
+        "ゝ",
+        "ゑ",
+        "゛",
+        "゜",
+        "ゎ",
+        "ゔ",
+        "゚",
+        "ゟ",
+        "゙",
+        "ゕ",
+        "ゖ",
    ],
    "Portuguese": [
        "a",
@ -340,6 +640,77 @@ FREQUENCIES: Dict[str, List[str]] = {
        "就",
        "出",
        "会",
+        "可",
+        "也",
+        "你",
+        "对",
+        "生",
+        "能",
+        "而",
+        "子",
+        "那",
+        "得",
+        "于",
+        "着",
+        "下",
+        "自",
+        "之",
+        "年",
+        "过",
+        "发",
+        "后",
+        "作",
+        "里",
+        "用",
+        "道",
+        "行",
+        "所",
+        "然",
+        "家",
+        "种",
+        "事",
+        "成",
+        "方",
+        "多",
+        "经",
+        "么",
+        "去",
+        "法",
+        "学",
+        "如",
+        "都",
+        "同",
+        "现",
+        "当",
+        "没",
+        "动",
+        "面",
+        "起",
+        "看",
+        "定",
+        "天",
+        "分",
+        "还",
+        "进",
+        "好",
+        "小",
+        "部",
+        "其",
+        "些",
+        "主",
+        "样",
+        "理",
+        "心",
+        "她",
+        "本",
+        "前",
+        "开",
+        "但",
+        "因",
+        "只",
+        "从",
+        "想",
+        "实",
    ],
    "Ukrainian": [
        "о",
@ -956,34 +1327,6 @@ FREQUENCIES: Dict[str, List[str]] = {
        "ö",
        "y",
    ],
-    "Simple English": [
-        "e",
-        "a",
-        "t",
-        "i",
-        "o",
-        "n",
-        "s",
-        "r",
-        "h",
-        "l",
-        "d",
-        "c",
-        "m",
-        "u",
-        "f",
-        "p",
-        "g",
-        "w",
-        "b",
-        "y",
-        "v",
-        "k",
-        "j",
-        "x",
-        "z",
-        "q",
-    ],
    "Thai": [
        "า",
        "น",
@ -1066,31 +1409,6 @@ FREQUENCIES: Dict[str, List[str]] = {
        "ஒ",
        "ஸ",
    ],
-    "Classical Chinese": [
-        "之",
-        "年",
-        "為",
-        "也",
-        "以",
-        "一",
-        "人",
-        "其",
-        "者",
-        "國",
-        "有",
-        "二",
-        "十",
-        "於",
-        "曰",
-        "三",
-        "不",
-        "大",
-        "而",
-        "子",
-        "中",
-        "五",
-        "四",
-    ],
    "Kazakh": [
        "а",
        "ы",
--- a/lib/charset_normalizer/cd.py
+++ b/lib/charset_normalizer/cd.py
@ -105,7 +105,7 @@ def mb_encoding_languages(iana_name: str) -> List[str]:
    ):
        return ["Japanese"]
    if iana_name.startswith("gb") or iana_name in ZH_NAMES:
-        return ["Chinese", "Classical Chinese"]
+        return ["Chinese"]
    if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
        return ["Korean"]

@ -179,22 +179,45 @@ def characters_popularity_compare(
    character_approved_count: int = 0
    FREQUENCIES_language_set = set(FREQUENCIES[language])

-    for character in ordered_characters:
+    ordered_characters_count: int = len(ordered_characters)
+    target_language_characters_count: int = len(FREQUENCIES[language])
+
+    large_alphabet: bool = target_language_characters_count > 26
+
+    for character, character_rank in zip(
+        ordered_characters, range(0, ordered_characters_count)
+    ):
        if character not in FREQUENCIES_language_set:
            continue

+        character_rank_in_language: int = FREQUENCIES[language].index(character)
+        expected_projection_ratio: float = (
+            target_language_characters_count / ordered_characters_count
+        )
+        character_rank_projection: int = int(character_rank * expected_projection_ratio)
+
+        if (
+            large_alphabet is False
+            and abs(character_rank_projection - character_rank_in_language) > 4
+        ):
+            continue
+
+        if (
+            large_alphabet is True
+            and abs(character_rank_projection - character_rank_in_language)
+            < target_language_characters_count / 3
+        ):
+            character_approved_count += 1
+            continue
+
        characters_before_source: List[str] = FREQUENCIES[language][
-            0 : FREQUENCIES[language].index(character)
+            0:character_rank_in_language
        ]
        characters_after_source: List[str] = FREQUENCIES[language][
-            FREQUENCIES[language].index(character) :
-        ]
-        characters_before: List[str] = ordered_characters[
-            0 : ordered_characters.index(character)
-        ]
-        characters_after: List[str] = ordered_characters[
-            ordered_characters.index(character) :
+            character_rank_in_language:
        ]
+        characters_before: List[str] = ordered_characters[0:character_rank]
+        characters_after: List[str] = ordered_characters[character_rank:]

        before_match_count: int = len(
            set(characters_before) & set(characters_before_source)
@ -289,6 +312,33 @@ def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
    return sorted(merge, key=lambda x: x[1], reverse=True)


+def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
+    """
+    We shall NOT return "English—" in CoherenceMatches because it is an alternative
+    of "English". This function only keeps the best match and remove the em-dash in it.
+    """
+    index_results: Dict[str, List[float]] = dict()
+
+    for result in results:
+        language, ratio = result
+        no_em_name: str = language.replace("—", "")
+
+        if no_em_name not in index_results:
+            index_results[no_em_name] = []
+
+        index_results[no_em_name].append(ratio)
+
+    if any(len(index_results[e]) > 1 for e in index_results):
+        filtered_results: CoherenceMatches = []
+
+        for language in index_results:
+            filtered_results.append((language, max(index_results[language])))
+
+        return filtered_results
+
+    return results
+
+
@lru_cache(maxsize=2048)
 def coherence_ratio(
    decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
@ -336,4 +386,6 @@ def coherence_ratio(
            if sufficient_match_count >= 3:
                break

-    return sorted(results, key=lambda x: x[1], reverse=True)
+    return sorted(
+        filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
+    )
--- a/lib/charset_normalizer/cli/normalizer.py
+++ b/lib/charset_normalizer/cli/normalizer.py
@ -1,15 +1,12 @@
 import argparse
 import sys
 from json import dumps
-from os.path import abspath
+from os.path import abspath, basename, dirname, join, realpath
 from platform import python_version
 from typing import List, Optional
+from unicodedata import unidata_version

-try:
-    from unicodedata2 import unidata_version
-except ImportError:
-    from unicodedata import unidata_version
-
+import charset_normalizer.md as md_module
 from charset_normalizer import from_fp
 from charset_normalizer.models import CliDetectionResult
 from charset_normalizer.version import __version__
@ -124,8 +121,11 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
    parser.add_argument(
        "--version",
        action="version",
-        version="Charset-Normalizer {} - Python {} - Unicode {}".format(
-            __version__, python_version(), unidata_version
+        version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
+            __version__,
+            python_version(),
+            unidata_version,
+            "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
        ),
        help="Show version information and exit.",
    )
@ -234,7 +234,10 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
                        my_file.close()
                    continue

-                o_: List[str] = my_file.name.split(".")
+                dir_path = dirname(realpath(my_file.name))
+                file_name = basename(realpath(my_file.name))
+
+                o_: List[str] = file_name.split(".")

                if args.replace is False:
                    o_.insert(-1, best_guess.encoding)
@ -255,7 +258,7 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
                    continue

                try:
-                    x_[0].unicode_path = abspath("./{}".format(".".join(o_)))
+                    x_[0].unicode_path = join(dir_path, ".".join(o_))

                    with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
                        fp.write(str(best_guess))
--- a/lib/charset_normalizer/constant.py
+++ b/lib/charset_normalizer/constant.py
@ -489,9 +489,7 @@ COMMON_SAFE_ASCII_CHARACTERS: Set[str] = {
 KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"}
 ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"}

-NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+")
-
 LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)

-# Logging LEVEL bellow DEBUG
+# Logging LEVEL below DEBUG
 TRACE: int = 5
--- a/lib/charset_normalizer/legacy.py
+++ b/lib/charset_normalizer/legacy.py
@ -1,9 +1,7 @@
-import warnings
 from typing import Dict, Optional, Union

-from .api import from_bytes, from_fp, from_path, normalize
+from .api import from_bytes
 from .constant import CHARDET_CORRESPONDENCE
-from .models import CharsetMatch, CharsetMatches


 def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
@ -43,53 +41,3 @@ def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
        "language": language,
        "confidence": confidence,
    }
-
-
-class CharsetNormalizerMatch(CharsetMatch):
-    pass
-
-
-class CharsetNormalizerMatches(CharsetMatches):
-    @staticmethod
-    def from_fp(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return from_fp(*args, **kwargs)  # pragma: nocover
-
-    @staticmethod
-    def from_bytes(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return from_bytes(*args, **kwargs)  # pragma: nocover
-
-    @staticmethod
-    def from_path(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return from_path(*args, **kwargs)  # pragma: nocover
-
-    @staticmethod
-    def normalize(*args, **kwargs):  # type: ignore
-        warnings.warn(  # pragma: nocover
-            "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
-            "and scheduled to be removed in 3.0",
-            DeprecationWarning,
-        )
-        return normalize(*args, **kwargs)  # pragma: nocover
-
-
-class CharsetDetector(CharsetNormalizerMatches):
-    pass
-
-
-class CharsetDoctor(CharsetNormalizerMatches):
-    pass
--- a/lib/charset_normalizer/md.py
+++ b/lib/charset_normalizer/md.py
@ -1,7 +1,12 @@
 from functools import lru_cache
+from logging import getLogger
 from typing import List, Optional

-from .constant import COMMON_SAFE_ASCII_CHARACTERS, UNICODE_SECONDARY_RANGE_KEYWORD
+from .constant import (
+    COMMON_SAFE_ASCII_CHARACTERS,
+    TRACE,
+    UNICODE_SECONDARY_RANGE_KEYWORD,
+)
 from .utils import (
    is_accentuated,
    is_ascii,
@ -123,7 +128,7 @@ class TooManyAccentuatedPlugin(MessDetectorPlugin):

    @property
    def ratio(self) -> float:
-        if self._character_count == 0:
+        if self._character_count == 0 or self._character_count < 8:
            return 0.0
        ratio_of_accentuation: float = self._accentuated_count / self._character_count
        return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
@ -547,7 +552,20 @@ def mess_ratio(
                break

    if debug:
+        logger = getLogger("charset_normalizer")
+
+        logger.log(
+            TRACE,
+            "Mess-detector extended-analysis start. "
+            f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
+            f"maximum_threshold={maximum_threshold}",
+        )
+
+        if len(decoded_sequence) > 16:
+            logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
+            logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
+
        for dt in detectors:  # pragma: nocover
-            print(dt.__class__, dt.ratio)
+            logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")

    return round(mean_mess_ratio, 3)
--- a/lib/charset_normalizer/models.py
+++ b/lib/charset_normalizer/models.py
@ -1,22 +1,9 @@
-import warnings
-from collections import Counter
 from encodings.aliases import aliases
 from hashlib import sha256
 from json import dumps
-from re import sub
-from typing import (
-    Any,
-    Counter as TypeCounter,
-    Dict,
-    Iterator,
-    List,
-    Optional,
-    Tuple,
-    Union,
-)
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union

-from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE
-from .md import mess_ratio
+from .constant import TOO_BIG_SEQUENCE
 from .utils import iana_name, is_multi_byte_encoding, unicode_range


@ -65,7 +52,7 @@ class CharsetMatch:
        chaos_difference: float = abs(self.chaos - other.chaos)
        coherence_difference: float = abs(self.coherence - other.coherence)

-        # Bellow 1% difference --> Use Coherence
+        # Below 1% difference --> Use Coherence
        if chaos_difference < 0.01 and coherence_difference > 0.02:
            # When having a tough decision, use the result that decoded as many multi-byte as possible.
            if chaos_difference == 0.0 and self.coherence == other.coherence:
@ -78,45 +65,6 @@ class CharsetMatch:
    def multi_byte_usage(self) -> float:
        return 1.0 - len(str(self)) / len(self.raw)

-    @property
-    def chaos_secondary_pass(self) -> float:
-        """
-        Check once again chaos in decoded text, except this time, with full content.
-        Use with caution, this can be very slow.
-        Notice: Will be removed in 3.0
-        """
-        warnings.warn(
-            "chaos_secondary_pass is deprecated and will be removed in 3.0",
-            DeprecationWarning,
-        )
-        return mess_ratio(str(self), 1.0)
-
-    @property
-    def coherence_non_latin(self) -> float:
-        """
-        Coherence ratio on the first non-latin language detected if ANY.
-        Notice: Will be removed in 3.0
-        """
-        warnings.warn(
-            "coherence_non_latin is deprecated and will be removed in 3.0",
-            DeprecationWarning,
-        )
-        return 0.0
-
-    @property
-    def w_counter(self) -> TypeCounter[str]:
-        """
-        Word counter instance on decoded text.
-        Notice: Will be removed in 3.0
-        """
-        warnings.warn(
-            "w_counter is deprecated and will be removed in 3.0", DeprecationWarning
-        )
-
-        string_printable_only = sub(NOT_PRINTABLE_PATTERN, " ", str(self).lower())
-
-        return Counter(string_printable_only.split())
-
    def __str__(self) -> str:
        # Lazy Str Loading
        if self._string is None:
@ -252,18 +200,6 @@ class CharsetMatch:
        """
        return [self._encoding] + [m.encoding for m in self._leaves]

-    def first(self) -> "CharsetMatch":
-        """
-        Kept for BC reasons. Will be removed in 3.0.
-        """
-        return self
-
-    def best(self) -> "CharsetMatch":
-        """
-        Kept for BC reasons. Will be removed in 3.0.
-        """
-        return self
-
    def output(self, encoding: str = "utf_8") -> bytes:
        """
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
--- a/lib/charset_normalizer/utils.py
+++ b/lib/charset_normalizer/utils.py
@ -1,12 +1,6 @@
-try:
-    # WARNING: unicodedata2 support is going to be removed in 3.0
-    # Python is quickly catching up.
-    import unicodedata2 as unicodedata
-except ImportError:
-    import unicodedata  # type: ignore[no-redef]
-
 import importlib
 import logging
+import unicodedata
 from codecs import IncrementalDecoder
 from encodings.aliases import aliases
 from functools import lru_cache
@ -402,7 +396,7 @@ def cut_sequence_chunks(

            # multi-byte bad cutting detector and adjustment
            # not the cleanest way to perform that fix but clever enough for now.
-            if is_multi_byte_decoder and i > 0 and sequences[i] >= 0x80:
+            if is_multi_byte_decoder and i > 0:

                chunk_partial_size_chk: int = min(chunk_size, 16)

--- a/lib/charset_normalizer/version.py
+++ b/lib/charset_normalizer/version.py
@ -2,5 +2,5 @@
 Expose version
 """

-__version__ = "2.1.1"
+__version__ = "3.0.1"
 VERSION = __version__.split(".")
--- a/lib/requests/init.py
+++ b/lib/requests/init.py
@ -80,8 +80,8 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
    elif charset_normalizer_version:
        major, minor, patch = charset_normalizer_version.split(".")[:3]
        major, minor, patch = int(major), int(minor), int(patch)
-        # charset_normalizer >= 2.0.0 < 3.0.0
-        assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0)
+        # charset_normalizer >= 2.0.0 < 4.0.0
+        assert (2, 0, 0) <= (major, minor, patch) < (4, 0, 0)
    else:
        raise Exception("You need either charset_normalizer or chardet installed")

--- a/lib/requests/version.py
+++ b/lib/requests/version.py
@ -5,10 +5,10 @@
 __title__ = "requests"
 __description__ = "Python HTTP for Humans."
 __url__ = "https://requests.readthedocs.io"
-__version__ = "2.28.1"
-__build__ = 0x022801
+__version__ = "2.28.2"
+__build__ = 0x022802
 __author__ = "Kenneth Reitz"
 __author_email__ = "me@kennethreitz.org"
 __license__ = "Apache 2.0"
-__copyright__ = "Copyright 2022 Kenneth Reitz"
+__copyright__ = "Copyright Kenneth Reitz"
 __cake__ = "\u2728 \U0001f370 \u2728"
--- a/lib/requests/models.py
+++ b/lib/requests/models.py
@ -438,7 +438,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
        if not scheme:
            raise MissingSchema(
                f"Invalid URL {url!r}: No scheme supplied. "
-                f"Perhaps you meant http://{url}?"
+                f"Perhaps you meant https://{url}?"
            )

        if not host:
--- a/lib/urllib3/_version.py
+++ b/lib/urllib3/_version.py
@ -1,2 +1,2 @@
 # This file is protected via CODEOWNERS
-__version__ = "1.26.13"
+__version__ = "1.26.14"
--- a/lib/urllib3/contrib/appengine.py
+++ b/lib/urllib3/contrib/appengine.py
@ -224,7 +224,7 @@ class AppEngineManager(RequestMethods):
                )

        # Check if we should retry the HTTP response.
-        has_retry_after = bool(http_response.getheader("Retry-After"))
+        has_retry_after = bool(http_response.headers.get("Retry-After"))
        if retries.is_retry(method, http_response.status, has_retry_after):
            retries = retries.increment(method, url, response=http_response, _pool=self)
            log.debug("Retry: %s", url)
--- a/lib/urllib3/contrib/ntlmpool.py
+++ b/lib/urllib3/contrib/ntlmpool.py
@ -69,7 +69,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):
        log.debug("Request headers: %s", headers)
        conn.request("GET", self.authurl, None, headers)
        res = conn.getresponse()
-        reshdr = dict(res.getheaders())
+        reshdr = dict(res.headers)
        log.debug("Response status: %s %s", res.status, res.reason)
        log.debug("Response headers: %s", reshdr)
        log.debug("Response data: %s [...]", res.read(100))
@ -101,7 +101,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):
        conn.request("GET", self.authurl, None, headers)
        res = conn.getresponse()
        log.debug("Response status: %s %s", res.status, res.reason)
-        log.debug("Response headers: %s", dict(res.getheaders()))
+        log.debug("Response headers: %s", dict(res.headers))
        log.debug("Response data: %s [...]", res.read()[:100])
        if res.status != 200:
            if res.status == 401:
--- a/lib/urllib3/response.py
+++ b/lib/urllib3/response.py
@ -666,7 +666,7 @@ class HTTPResponse(io.IOBase):
    def getheaders(self):
        warnings.warn(
            "HTTPResponse.getheaders() is deprecated and will be removed "
-            "in urllib3 v2.1.0. Instead access HTTResponse.headers directly.",
+            "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
            category=DeprecationWarning,
            stacklevel=2,
        )
@ -675,7 +675,7 @@ class HTTPResponse(io.IOBase):
    def getheader(self, name, default=None):
        warnings.warn(
            "HTTPResponse.getheader() is deprecated and will be removed "
-            "in urllib3 v2.1.0. Instead use HTTResponse.headers.get(name, default).",
+            "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
            category=DeprecationWarning,
            stacklevel=2,
        )
--- a/lib/urllib3/util/url.py
+++ b/lib/urllib3/util/url.py
@ -63,7 +63,7 @@ IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
 BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
 ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")

-_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*([0-9]{0,5}))?$") % (
+_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
    REG_NAME_PAT,
    IPV4_PAT,
    IPV6_ADDRZ_PAT,
--- a/requirements.txt
+++ b/requirements.txt
@ -36,7 +36,7 @@ pyparsing==3.0.9
 python-dateutil==2.8.2
 python-twitter==3.5
 pytz==2022.7
-requests==2.28.1
+requests==2.28.2
 requests-oauthlib==1.3.1
 rumps==0.4.0; platform_system == "Darwin"
 simplejson==3.18.0