Update plexapi==4.17.0

2025-08-14 02:26:58 -07:00 · 2025-05-10 16:13:23 -07:00 · 2025-05-10 16:13:23 -07:00 · f6bffe1850
commit f6bffe1850
parent 3cb71f94a3
32 changed files with 1224 additions and 966 deletions
--- a/lib/charset_normalizer/init.py
+++ b/lib/charset_normalizer/init.py
@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 Charset-Normalizer
 ~~~~~~~~~~~~~~
@ -19,6 +18,9 @@ at <https://github.com/Ousret/charset_normalizer>.
 :copyright: (c) 2021 by Ahmed TAHRI
 :license: MIT, see LICENSE for more details.
 """
+
+from __future__ import annotations
+
 import logging

 from .api import from_bytes, from_fp, from_path, is_binary
--- a/lib/charset_normalizer/main.py
+++ b/lib/charset_normalizer/main.py
@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from .cli import cli_detect

 if __name__ == "__main__":
--- a/lib/charset_normalizer/api.py
+++ b/lib/charset_normalizer/api.py
@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import logging
 from os import PathLike
-from typing import BinaryIO, List, Optional, Set, Union
+from typing import BinaryIO

 from .cd import (
    coherence_ratio,
@ -21,8 +23,6 @@ from .utils import (
    should_strip_sig_or_bom,
 )

-# Will most likely be controversial
-# logging.addLevelName(TRACE, "TRACE")
 logger = logging.getLogger("charset_normalizer")
 explain_handler = logging.StreamHandler()
 explain_handler.setFormatter(
@ -31,12 +31,12 @@ explain_handler.setFormatter(


 def from_bytes(
-    sequences: Union[bytes, bytearray],
+    sequences: bytes | bytearray,
    steps: int = 5,
    chunk_size: int = 512,
    threshold: float = 0.2,
-    cp_isolation: Optional[List[str]] = None,
-    cp_exclusion: Optional[List[str]] = None,
+    cp_isolation: list[str] | None = None,
+    cp_exclusion: list[str] | None = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
@ -62,7 +62,7 @@ def from_bytes(

    if not isinstance(sequences, (bytearray, bytes)):
        raise TypeError(
-            "Expected object of type bytes or bytearray, got: {0}".format(
+            "Expected object of type bytes or bytearray, got: {}".format(
                type(sequences)
            )
        )
@ -76,7 +76,7 @@ def from_bytes(

    if length == 0:
        logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
-        if explain:
+        if explain:  # Defensive: ensure exit path clean handler
            logger.removeHandler(explain_handler)
            logger.setLevel(previous_logger_level or logging.WARNING)
        return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
@ -135,9 +135,9 @@ def from_bytes(
            ),
        )

-    prioritized_encodings: List[str] = []
+    prioritized_encodings: list[str] = []

-    specified_encoding: Optional[str] = (
+    specified_encoding: str | None = (
        any_specified_encoding(sequences) if preemptive_behaviour else None
    )

@ -149,13 +149,13 @@ def from_bytes(
            specified_encoding,
        )

-    tested: Set[str] = set()
-    tested_but_hard_failure: List[str] = []
-    tested_but_soft_failure: List[str] = []
+    tested: set[str] = set()
+    tested_but_hard_failure: list[str] = []
+    tested_but_soft_failure: list[str] = []

-    fallback_ascii: Optional[CharsetMatch] = None
-    fallback_u8: Optional[CharsetMatch] = None
-    fallback_specified: Optional[CharsetMatch] = None
+    fallback_ascii: CharsetMatch | None = None
+    fallback_u8: CharsetMatch | None = None
+    fallback_specified: CharsetMatch | None = None

    results: CharsetMatches = CharsetMatches()

@ -189,7 +189,7 @@ def from_bytes(

        tested.add(encoding_iana)

-        decoded_payload: Optional[str] = None
+        decoded_payload: str | None = None
        bom_or_sig_available: bool = sig_encoding == encoding_iana
        strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
            encoding_iana
@ -292,7 +292,7 @@ def from_bytes(
        early_stop_count: int = 0
        lazy_str_hard_failure = False

-        md_chunks: List[str] = []
+        md_chunks: list[str] = []
        md_ratios = []

        try:
@ -397,7 +397,7 @@ def from_bytes(
        )

        if not is_multi_byte_decoder:
-            target_languages: List[str] = encoding_languages(encoding_iana)
+            target_languages: list[str] = encoding_languages(encoding_iana)
        else:
            target_languages = mb_encoding_languages(encoding_iana)

@ -462,7 +462,7 @@ def from_bytes(
                    "Encoding detection: %s is most likely the one.",
                    current_match.encoding,
                )
-                if explain:
+                if explain:  # Defensive: ensure exit path clean handler
                    logger.removeHandler(explain_handler)
                    logger.setLevel(previous_logger_level)
                return CharsetMatches([current_match])
@ -480,7 +480,7 @@ def from_bytes(
                "Encoding detection: %s is most likely the one.",
                probable_result.encoding,
            )
-            if explain:
+            if explain:  # Defensive: ensure exit path clean handler
                logger.removeHandler(explain_handler)
                logger.setLevel(previous_logger_level)

@ -492,7 +492,7 @@ def from_bytes(
                "the beginning of the sequence.",
                encoding_iana,
            )
-            if explain:
+            if explain:  # Defensive: ensure exit path clean handler
                logger.removeHandler(explain_handler)
                logger.setLevel(previous_logger_level)
            return CharsetMatches([results[encoding_iana]])
@ -546,8 +546,8 @@ def from_fp(
    steps: int = 5,
    chunk_size: int = 512,
    threshold: float = 0.20,
-    cp_isolation: Optional[List[str]] = None,
-    cp_exclusion: Optional[List[str]] = None,
+    cp_isolation: list[str] | None = None,
+    cp_exclusion: list[str] | None = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
@ -572,12 +572,12 @@ def from_fp(


 def from_path(
-    path: Union[str, bytes, PathLike],  # type: ignore[type-arg]
+    path: str | bytes | PathLike,  # type: ignore[type-arg]
    steps: int = 5,
    chunk_size: int = 512,
    threshold: float = 0.20,
-    cp_isolation: Optional[List[str]] = None,
-    cp_exclusion: Optional[List[str]] = None,
+    cp_isolation: list[str] | None = None,
+    cp_exclusion: list[str] | None = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
@ -603,12 +603,12 @@ def from_path(


 def is_binary(
-    fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes],  # type: ignore[type-arg]
+    fp_or_path_or_payload: PathLike | str | BinaryIO | bytes,  # type: ignore[type-arg]
    steps: int = 5,
    chunk_size: int = 512,
    threshold: float = 0.20,
-    cp_isolation: Optional[List[str]] = None,
-    cp_exclusion: Optional[List[str]] = None,
+    cp_isolation: list[str] | None = None,
+    cp_exclusion: list[str] | None = None,
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
--- a/lib/charset_normalizer/cd.py
+++ b/lib/charset_normalizer/cd.py
@ -1,8 +1,10 @@
+from __future__ import annotations
+
 import importlib
 from codecs import IncrementalDecoder
 from collections import Counter
 from functools import lru_cache
-from typing import Counter as TypeCounter, Dict, List, Optional, Tuple
+from typing import Counter as TypeCounter

 from .constant import (
    FREQUENCIES,
@ -22,26 +24,24 @@ from .utils import (
 )


-def encoding_unicode_range(iana_name: str) -> List[str]:
+def encoding_unicode_range(iana_name: str) -> list[str]:
    """
    Return associated unicode ranges in a single byte code page.
    """
    if is_multi_byte_encoding(iana_name):
-        raise IOError("Function not supported on multi-byte code page")
+        raise OSError("Function not supported on multi-byte code page")

-    decoder = importlib.import_module(
-        "encodings.{}".format(iana_name)
-    ).IncrementalDecoder
+    decoder = importlib.import_module(f"encodings.{iana_name}").IncrementalDecoder

    p: IncrementalDecoder = decoder(errors="ignore")
-    seen_ranges: Dict[str, int] = {}
+    seen_ranges: dict[str, int] = {}
    character_count: int = 0

    for i in range(0x40, 0xFF):
        chunk: str = p.decode(bytes([i]))

        if chunk:
-            character_range: Optional[str] = unicode_range(chunk)
+            character_range: str | None = unicode_range(chunk)

            if character_range is None:
                continue
@ -61,11 +61,11 @@ def encoding_unicode_range(iana_name: str) -> List[str]:
    )


-def unicode_range_languages(primary_range: str) -> List[str]:
+def unicode_range_languages(primary_range: str) -> list[str]:
    """
    Return inferred languages used with a unicode range.
    """
-    languages: List[str] = []
+    languages: list[str] = []

    for language, characters in FREQUENCIES.items():
        for character in characters:
@ -77,13 +77,13 @@ def unicode_range_languages(primary_range: str) -> List[str]:


@lru_cache()
-def encoding_languages(iana_name: str) -> List[str]:
+def encoding_languages(iana_name: str) -> list[str]:
    """
    Single-byte encoding language association. Some code page are heavily linked to particular language(s).
    This function does the correspondence.
    """
-    unicode_ranges: List[str] = encoding_unicode_range(iana_name)
-    primary_range: Optional[str] = None
+    unicode_ranges: list[str] = encoding_unicode_range(iana_name)
+    primary_range: str | None = None

    for specified_range in unicode_ranges:
        if "Latin" not in specified_range:
@ -97,7 +97,7 @@ def encoding_languages(iana_name: str) -> List[str]:


@lru_cache()
-def mb_encoding_languages(iana_name: str) -> List[str]:
+def mb_encoding_languages(iana_name: str) -> list[str]:
    """
    Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
    This function does the correspondence.
@ -118,7 +118,7 @@ def mb_encoding_languages(iana_name: str) -> List[str]:


@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
-def get_target_features(language: str) -> Tuple[bool, bool]:
+def get_target_features(language: str) -> tuple[bool, bool]:
    """
    Determine main aspects from a supported language if it contains accents and if is pure Latin.
    """
@ -135,12 +135,12 @@ def get_target_features(language: str) -> Tuple[bool, bool]:


 def alphabet_languages(
-    characters: List[str], ignore_non_latin: bool = False
-) -> List[str]:
+    characters: list[str], ignore_non_latin: bool = False
+) -> list[str]:
    """
    Return associated languages associated to given characters.
    """
-    languages: List[Tuple[str, float]] = []
+    languages: list[tuple[str, float]] = []

    source_have_accents = any(is_accentuated(character) for character in characters)

@ -170,7 +170,7 @@ def alphabet_languages(


 def characters_popularity_compare(
-    language: str, ordered_characters: List[str]
+    language: str, ordered_characters: list[str]
 ) -> float:
    """
    Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
@ -178,7 +178,7 @@ def characters_popularity_compare(
    Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
    """
    if language not in FREQUENCIES:
-        raise ValueError("{} not available".format(language))
+        raise ValueError(f"{language} not available")

    character_approved_count: int = 0
    FREQUENCIES_language_set = set(FREQUENCIES[language])
@ -214,14 +214,14 @@ def characters_popularity_compare(
            character_approved_count += 1
            continue

-        characters_before_source: List[str] = FREQUENCIES[language][
+        characters_before_source: list[str] = FREQUENCIES[language][
            0:character_rank_in_language
        ]
-        characters_after_source: List[str] = FREQUENCIES[language][
+        characters_after_source: list[str] = FREQUENCIES[language][
            character_rank_in_language:
        ]
-        characters_before: List[str] = ordered_characters[0:character_rank]
-        characters_after: List[str] = ordered_characters[character_rank:]
+        characters_before: list[str] = ordered_characters[0:character_rank]
+        characters_after: list[str] = ordered_characters[character_rank:]

        before_match_count: int = len(
            set(characters_before) & set(characters_before_source)
@ -249,24 +249,24 @@ def characters_popularity_compare(
    return character_approved_count / len(ordered_characters)


-def alpha_unicode_split(decoded_sequence: str) -> List[str]:
+def alpha_unicode_split(decoded_sequence: str) -> list[str]:
    """
    Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
    Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
    One containing the latin letters and the other hebrew.
    """
-    layers: Dict[str, str] = {}
+    layers: dict[str, str] = {}

    for character in decoded_sequence:
        if character.isalpha() is False:
            continue

-        character_range: Optional[str] = unicode_range(character)
+        character_range: str | None = unicode_range(character)

        if character_range is None:
            continue

-        layer_target_range: Optional[str] = None
+        layer_target_range: str | None = None

        for discovered_range in layers:
            if (
@ -288,12 +288,12 @@ def alpha_unicode_split(decoded_sequence: str) -> List[str]:
    return list(layers.values())


-def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
+def merge_coherence_ratios(results: list[CoherenceMatches]) -> CoherenceMatches:
    """
    This function merge results previously given by the function coherence_ratio.
    The return type is the same as coherence_ratio.
    """
-    per_language_ratios: Dict[str, List[float]] = {}
+    per_language_ratios: dict[str, list[float]] = {}
    for result in results:
        for sub_result in result:
            language, ratio = sub_result
@ -321,7 +321,7 @@ def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
    We shall NOT return "English—" in CoherenceMatches because it is an alternative
    of "English". This function only keeps the best match and remove the em-dash in it.
    """
-    index_results: Dict[str, List[float]] = dict()
+    index_results: dict[str, list[float]] = dict()

    for result in results:
        language, ratio = result
@ -345,14 +345,14 @@ def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:

@lru_cache(maxsize=2048)
 def coherence_ratio(
-    decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
+    decoded_sequence: str, threshold: float = 0.1, lg_inclusion: str | None = None
 ) -> CoherenceMatches:
    """
    Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
    A layer = Character extraction by alphabets/ranges.
    """

-    results: List[Tuple[str, float]] = []
+    results: list[tuple[str, float]] = []
    ignore_non_latin: bool = False

    sufficient_match_count: int = 0
@ -371,7 +371,7 @@ def coherence_ratio(
        if character_count <= TOO_SMALL_SEQUENCE:
            continue

-        popular_character_ordered: List[str] = [c for c, o in most_common]
+        popular_character_ordered: list[str] = [c for c, o in most_common]

        for language in lg_inclusion_list or alphabet_languages(
            popular_character_ordered, ignore_non_latin
--- a/lib/charset_normalizer/cli/init.py
+++ b/lib/charset_normalizer/cli/init.py
@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from .__main__ import cli_detect, query_yes_no

 __all__ = (
--- a/lib/charset_normalizer/cli/main.py
+++ b/lib/charset_normalizer/cli/main.py
@ -1,9 +1,11 @@
+from __future__ import annotations
+
 import argparse
 import sys
+import typing
 from json import dumps
 from os.path import abspath, basename, dirname, join, realpath
 from platform import python_version
-from typing import List, Optional
 from unicodedata import unidata_version

 import charset_normalizer.md as md_module
@ -42,10 +44,69 @@ def query_yes_no(question: str, default: str = "yes") -> bool:
        elif choice in valid:
            return valid[choice]
        else:
-            sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
+            sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")


-def cli_detect(argv: Optional[List[str]] = None) -> int:
+class FileType:
+    """Factory for creating file object types
+
+    Instances of FileType are typically passed as type= arguments to the
+    ArgumentParser add_argument() method.
+
+    Keyword Arguments:
+        - mode -- A string indicating how the file is to be opened. Accepts the
+            same values as the builtin open() function.
+        - bufsize -- The file's desired buffer size. Accepts the same values as
+            the builtin open() function.
+        - encoding -- The file's encoding. Accepts the same values as the
+            builtin open() function.
+        - errors -- A string indicating how encoding and decoding errors are to
+            be handled. Accepts the same value as the builtin open() function.
+
+    Backported from CPython 3.12
+    """
+
+    def __init__(
+        self,
+        mode: str = "r",
+        bufsize: int = -1,
+        encoding: str | None = None,
+        errors: str | None = None,
+    ):
+        self._mode = mode
+        self._bufsize = bufsize
+        self._encoding = encoding
+        self._errors = errors
+
+    def __call__(self, string: str) -> typing.IO:  # type: ignore[type-arg]
+        # the special argument "-" means sys.std{in,out}
+        if string == "-":
+            if "r" in self._mode:
+                return sys.stdin.buffer if "b" in self._mode else sys.stdin
+            elif any(c in self._mode for c in "wax"):
+                return sys.stdout.buffer if "b" in self._mode else sys.stdout
+            else:
+                msg = f'argument "-" with mode {self._mode}'
+                raise ValueError(msg)
+
+        # all other arguments are used as file names
+        try:
+            return open(string, self._mode, self._bufsize, self._encoding, self._errors)
+        except OSError as e:
+            message = f"can't open '{string}': {e}"
+            raise argparse.ArgumentTypeError(message)
+
+    def __repr__(self) -> str:
+        args = self._mode, self._bufsize
+        kwargs = [("encoding", self._encoding), ("errors", self._errors)]
+        args_str = ", ".join(
+            [repr(arg) for arg in args if arg != -1]
+            + [f"{kw}={arg!r}" for kw, arg in kwargs if arg is not None]
+        )
+        return f"{type(self).__name__}({args_str})"
+
+
+def cli_detect(argv: list[str] | None = None) -> int:
    """
    CLI assistant using ARGV and ArgumentParser
    :param argv:
@ -58,7 +119,7 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
    )

    parser.add_argument(
-        "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
+        "files", type=FileType("rb"), nargs="+", help="File(s) to be analysed"
    )
    parser.add_argument(
        "-v",
@ -124,7 +185,7 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
        default=0.2,
        type=float,
        dest="threshold",
-        help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.",
+        help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.",
    )
    parser.add_argument(
        "--version",
@ -259,7 +320,7 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
                dir_path = dirname(realpath(my_file.name))
                file_name = basename(realpath(my_file.name))

-                o_: List[str] = file_name.split(".")
+                o_: list[str] = file_name.split(".")

                if args.replace is False:
                    o_.insert(-1, best_guess.encoding)
@ -284,7 +345,7 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:

                    with open(x_[0].unicode_path, "wb") as fp:
                        fp.write(best_guess.output())
-                except IOError as e:
+                except OSError as e:
                    print(str(e), file=sys.stderr)
                    if my_file.closed is False:
                        my_file.close()
--- a/lib/charset_normalizer/constant.py
+++ b/lib/charset_normalizer/constant.py
@ -1,11 +1,12 @@
-# -*- coding: utf-8 -*-
+from __future__ import annotations
+
 from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE
 from encodings.aliases import aliases
-from re import IGNORECASE, compile as re_compile
-from typing import Dict, List, Set, Union
+from re import IGNORECASE
+from re import compile as re_compile

 # Contain for each eligible encoding a list of/item bytes SIG/BOM
-ENCODING_MARKS: Dict[str, Union[bytes, List[bytes]]] = {
+ENCODING_MARKS: dict[str, bytes | list[bytes]] = {
    "utf_8": BOM_UTF8,
    "utf_7": [
        b"\x2b\x2f\x76\x38",
@ -25,7 +26,7 @@ TOO_BIG_SEQUENCE: int = int(10e6)
 UTF8_MAXIMAL_ALLOCATION: int = 1_112_064

 # Up-to-date Unicode ucd/15.0.0
-UNICODE_RANGES_COMBINED: Dict[str, range] = {
+UNICODE_RANGES_COMBINED: dict[str, range] = {
    "Control character": range(32),
    "Basic Latin": range(32, 128),
    "Latin-1 Supplement": range(128, 256),
@ -357,7 +358,7 @@ UNICODE_RANGES_COMBINED: Dict[str, range] = {
 }


-UNICODE_SECONDARY_RANGE_KEYWORD: List[str] = [
+UNICODE_SECONDARY_RANGE_KEYWORD: list[str] = [
    "Supplement",
    "Extended",
    "Extensions",
@ -392,7 +393,7 @@ IANA_NO_ALIASES = [
    "koi8_u",
 ]

-IANA_SUPPORTED: List[str] = sorted(
+IANA_SUPPORTED: list[str] = sorted(
    filter(
        lambda x: x.endswith("_codec") is False
        and x not in {"rot_13", "tactis", "mbcs"},
@ -403,7 +404,7 @@ IANA_SUPPORTED: List[str] = sorted(
 IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED)

 # pre-computed code page that are similar using the function cp_similarity.
-IANA_SUPPORTED_SIMILAR: Dict[str, List[str]] = {
+IANA_SUPPORTED_SIMILAR: dict[str, list[str]] = {
    "cp037": ["cp1026", "cp1140", "cp273", "cp500"],
    "cp1026": ["cp037", "cp1140", "cp273", "cp500"],
    "cp1125": ["cp866"],
@ -492,7 +493,7 @@ IANA_SUPPORTED_SIMILAR: Dict[str, List[str]] = {
 }


-CHARDET_CORRESPONDENCE: Dict[str, str] = {
+CHARDET_CORRESPONDENCE: dict[str, str] = {
    "iso2022_kr": "ISO-2022-KR",
    "iso2022_jp": "ISO-2022-JP",
    "euc_kr": "EUC-KR",
@ -528,7 +529,7 @@ CHARDET_CORRESPONDENCE: Dict[str, str] = {
 }


-COMMON_SAFE_ASCII_CHARACTERS: Set[str] = {
+COMMON_SAFE_ASCII_CHARACTERS: set[str] = {
    "<",
    ">",
    "=",
@ -548,9 +549,26 @@ COMMON_SAFE_ASCII_CHARACTERS: Set[str] = {
    ")",
 }

+# Sample character sets — replace with full lists if needed
+COMMON_CHINESE_CHARACTERS = "的一是在不了有和人这中大为上个国我以要他时来用们生到作地于出就分对成会可主发年动同工也能下过子说产种面而方后多定行学法所民得经十三之进着等部度家电力里如水化高自二理起小物现实加量都两体制机当使点从业本去把性好应开它合还因由其些然前外天政四日那社义事平形相全表间样与关各重新线内数正心反你明看原又么利比或但质气第向道命此变条只没结解问意建月公无系军很情者最立代想已通并提直题党程展五果料象员革位入常文总次品式活设及管特件长求老头基资边流路级少图山统接知较将组见计别她手角期根论运农指几九区强放决西被干做必战先回则任取据处队南给色光门即保治北造百规热领七海口东导器压志世金增争济阶油思术极交受联什认六共权收证改清己美再采转更单风切打白教速花带安场身车例真务具万每目至达走积示议声报斗完类八离华名确才科张信马节话米整空元况今集温传土许步群广石记需段研界拉林律叫且究观越织装影算低持音众书布复容儿须际商非验连断深难近矿千周委素技备半办青省列习响约支般史感劳便团往酸历市克何除消构府太准精值号率族维划选标写存候毛亲快效斯院查江型眼王按格养易置派层片始却专状育厂京识适属圆包火住调满县局照参红细引听该铁价严龙飞"

-KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"}
-ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"}
+COMMON_JAPANESE_CHARACTERS = "日一国年大十二本中長出三時行見月分後前生五間上東四今金九入学高円子外八六下来気小七山話女北午百書先名川千水半男西電校語土木聞食車何南万毎白天母火右読友左休父雨"
+
+COMMON_KOREAN_CHARACTERS = "一二三四五六七八九十百千萬上下左右中人女子大小山川日月火水木金土父母天地國名年時文校學生"
+
+# Combine all into a set
+COMMON_CJK_CHARACTERS = set(
+    "".join(
+        [
+            COMMON_CHINESE_CHARACTERS,
+            COMMON_JAPANESE_CHARACTERS,
+            COMMON_KOREAN_CHARACTERS,
+        ]
+    )
+)
+
+KO_NAMES: set[str] = {"johab", "cp949", "euc_kr"}
+ZH_NAMES: set[str] = {"big5", "cp950", "big5hkscs", "hz"}

 # Logging LEVEL below DEBUG
 TRACE: int = 5
@ -558,7 +576,7 @@ TRACE: int = 5

 # Language label that contain the em dash "—"
 # character are to be considered alternative seq to origin
-FREQUENCIES: Dict[str, List[str]] = {
+FREQUENCIES: dict[str, list[str]] = {
    "English": [
        "e",
        "a",
--- a/lib/charset_normalizer/legacy.py
+++ b/lib/charset_normalizer/legacy.py
@ -1,6 +1,6 @@
 from __future__ import annotations

-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any
 from warnings import warn

 from .api import from_bytes
@ -11,9 +11,9 @@ if TYPE_CHECKING:
    from typing_extensions import TypedDict

    class ResultDict(TypedDict):
-        encoding: Optional[str]
+        encoding: str | None
        language: str
-        confidence: Optional[float]
+        confidence: float | None


 def detect(
@ -37,8 +37,7 @@ def detect(

    if not isinstance(byte_str, (bytearray, bytes)):
        raise TypeError(  # pragma: nocover
-            "Expected object of type bytes or bytearray, got: "
-            "{0}".format(type(byte_str))
+            f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
        )

    if isinstance(byte_str, bytearray):
--- a/lib/charset_normalizer/md.py
+++ b/lib/charset_normalizer/md.py
@ -1,6 +1,7 @@
+from __future__ import annotations
+
 from functools import lru_cache
 from logging import getLogger
-from typing import List, Optional

 from .constant import (
    COMMON_SAFE_ASCII_CHARACTERS,
@ -25,6 +26,7 @@ from .utils import (
    is_unprintable,
    remove_accent,
    unicode_range,
+    is_cjk_uncommon,
 )


@ -68,7 +70,7 @@ class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
        self._symbol_count: int = 0
        self._character_count: int = 0

-        self._last_printable_char: Optional[str] = None
+        self._last_printable_char: str | None = None
        self._frenzy_symbol_in_word: bool = False

    def eligible(self, character: str) -> bool:
@ -92,7 +94,7 @@ class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):

        self._last_printable_char = character

-    def reset(self) -> None:  # pragma: no cover
+    def reset(self) -> None:  # Abstract
        self._punctuation_count = 0
        self._character_count = 0
        self._symbol_count = 0
@ -123,7 +125,7 @@ class TooManyAccentuatedPlugin(MessDetectorPlugin):
        if is_accentuated(character):
            self._accentuated_count += 1

-    def reset(self) -> None:  # pragma: no cover
+    def reset(self) -> None:  # Abstract
        self._character_count = 0
        self._accentuated_count = 0

@ -149,7 +151,7 @@ class UnprintablePlugin(MessDetectorPlugin):
            self._unprintable_count += 1
        self._character_count += 1

-    def reset(self) -> None:  # pragma: no cover
+    def reset(self) -> None:  # Abstract
        self._unprintable_count = 0

    @property
@ -165,7 +167,7 @@ class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
        self._successive_count: int = 0
        self._character_count: int = 0

-        self._last_latin_character: Optional[str] = None
+        self._last_latin_character: str | None = None

    def eligible(self, character: str) -> bool:
        return character.isalpha() and is_latin(character)
@ -184,7 +186,7 @@ class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
                self._successive_count += 1
        self._last_latin_character = character

-    def reset(self) -> None:  # pragma: no cover
+    def reset(self) -> None:  # Abstract
        self._successive_count = 0
        self._character_count = 0
        self._last_latin_character = None
@ -201,7 +203,7 @@ class SuspiciousRange(MessDetectorPlugin):
    def __init__(self) -> None:
        self._suspicious_successive_range_count: int = 0
        self._character_count: int = 0
-        self._last_printable_seen: Optional[str] = None
+        self._last_printable_seen: str | None = None

    def eligible(self, character: str) -> bool:
        return character.isprintable()
@ -221,15 +223,15 @@ class SuspiciousRange(MessDetectorPlugin):
            self._last_printable_seen = character
            return

-        unicode_range_a: Optional[str] = unicode_range(self._last_printable_seen)
-        unicode_range_b: Optional[str] = unicode_range(character)
+        unicode_range_a: str | None = unicode_range(self._last_printable_seen)
+        unicode_range_b: str | None = unicode_range(character)

        if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
            self._suspicious_successive_range_count += 1

        self._last_printable_seen = character

-    def reset(self) -> None:  # pragma: no cover
+    def reset(self) -> None:  # Abstract
        self._character_count = 0
        self._suspicious_successive_range_count = 0
        self._last_printable_seen = None
@ -346,7 +348,7 @@ class SuperWeirdWordPlugin(MessDetectorPlugin):
            self._is_current_word_bad = True
            self._buffer += character

-    def reset(self) -> None:  # pragma: no cover
+    def reset(self) -> None:  # Abstract
        self._buffer = ""
        self._is_current_word_bad = False
        self._foreign_long_watch = False
@ -364,35 +366,39 @@ class SuperWeirdWordPlugin(MessDetectorPlugin):
        return self._bad_character_count / self._character_count


-class CjkInvalidStopPlugin(MessDetectorPlugin):
+class CjkUncommonPlugin(MessDetectorPlugin):
    """
-    GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
-    can be easily detected. Searching for the overuse of '丅' and '丄'.
+    Detect messy CJK text that probably means nothing.
    """

    def __init__(self) -> None:
-        self._wrong_stop_count: int = 0
-        self._cjk_character_count: int = 0
+        self._character_count: int = 0
+        self._uncommon_count: int = 0

    def eligible(self, character: str) -> bool:
-        return True
+        return is_cjk(character)

    def feed(self, character: str) -> None:
-        if character in {"丅", "丄"}:
-            self._wrong_stop_count += 1
-            return
-        if is_cjk(character):
-            self._cjk_character_count += 1
+        self._character_count += 1

-    def reset(self) -> None:  # pragma: no cover
-        self._wrong_stop_count = 0
-        self._cjk_character_count = 0
+        if is_cjk_uncommon(character):
+            self._uncommon_count += 1
+            return
+
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._uncommon_count = 0

    @property
    def ratio(self) -> float:
-        if self._cjk_character_count < 16:
+        if self._character_count < 8:
            return 0.0
-        return self._wrong_stop_count / self._cjk_character_count
+
+        uncommon_form_usage: float = self._uncommon_count / self._character_count
+
+        # we can be pretty sure it's garbage when uncommon characters are widely
+        # used. otherwise it could just be traditional chinese for example.
+        return uncommon_form_usage / 10 if uncommon_form_usage > 0.5 else 0.0


 class ArchaicUpperLowerPlugin(MessDetectorPlugin):
@ -406,7 +412,7 @@ class ArchaicUpperLowerPlugin(MessDetectorPlugin):

        self._character_count: int = 0

-        self._last_alpha_seen: Optional[str] = None
+        self._last_alpha_seen: str | None = None
        self._current_ascii_only: bool = True

    def eligible(self, character: str) -> bool:
@ -454,7 +460,7 @@ class ArchaicUpperLowerPlugin(MessDetectorPlugin):
        self._character_count_since_last_sep += 1
        self._last_alpha_seen = character

-    def reset(self) -> None:  # pragma: no cover
+    def reset(self) -> None:  # Abstract
        self._character_count = 0
        self._character_count_since_last_sep = 0
        self._successive_upper_lower_count = 0
@ -476,7 +482,7 @@ class ArabicIsolatedFormPlugin(MessDetectorPlugin):
        self._character_count: int = 0
        self._isolated_form_count: int = 0

-    def reset(self) -> None:  # pragma: no cover
+    def reset(self) -> None:  # Abstract
        self._character_count = 0
        self._isolated_form_count = 0

@ -501,7 +507,7 @@ class ArabicIsolatedFormPlugin(MessDetectorPlugin):

@lru_cache(maxsize=1024)
 def is_suspiciously_successive_range(
-    unicode_range_a: Optional[str], unicode_range_b: Optional[str]
+    unicode_range_a: str | None, unicode_range_b: str | None
 ) -> bool:
    """
    Determine if two Unicode range seen next to each other can be considered as suspicious.
@ -525,9 +531,10 @@ def is_suspiciously_successive_range(
    ):
        return False

-    keywords_range_a, keywords_range_b = unicode_range_a.split(
-        " "
-    ), unicode_range_b.split(" ")
+    keywords_range_a, keywords_range_b = (
+        unicode_range_a.split(" "),
+        unicode_range_b.split(" "),
+    )

    for el in keywords_range_a:
        if el in UNICODE_SECONDARY_RANGE_KEYWORD:
@ -580,7 +587,7 @@ def mess_ratio(
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    """

-    detectors: List[MessDetectorPlugin] = [
+    detectors: list[MessDetectorPlugin] = [
        md_class() for md_class in MessDetectorPlugin.__subclasses__()
    ]

@ -622,7 +629,7 @@ def mess_ratio(
            logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
            logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")

-        for dt in detectors:  # pragma: nocover
+        for dt in detectors:
            logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")

    return round(mean_mess_ratio, 3)
--- a/lib/charset_normalizer/models.py
+++ b/lib/charset_normalizer/models.py
@ -1,8 +1,10 @@
+from __future__ import annotations
+
 from encodings.aliases import aliases
 from hashlib import sha256
 from json import dumps
 from re import sub
-from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
+from typing import Any, Iterator, List, Tuple

 from .constant import RE_POSSIBLE_ENCODING_INDICATION, TOO_BIG_SEQUENCE
 from .utils import iana_name, is_multi_byte_encoding, unicode_range
@ -15,9 +17,9 @@ class CharsetMatch:
        guessed_encoding: str,
        mean_mess_ratio: float,
        has_sig_or_bom: bool,
-        languages: "CoherenceMatches",
-        decoded_payload: Optional[str] = None,
-        preemptive_declaration: Optional[str] = None,
+        languages: CoherenceMatches,
+        decoded_payload: str | None = None,
+        preemptive_declaration: str | None = None,
    ):
        self._payload: bytes = payload

@ -25,17 +27,17 @@ class CharsetMatch:
        self._mean_mess_ratio: float = mean_mess_ratio
        self._languages: CoherenceMatches = languages
        self._has_sig_or_bom: bool = has_sig_or_bom
-        self._unicode_ranges: Optional[List[str]] = None
+        self._unicode_ranges: list[str] | None = None

-        self._leaves: List[CharsetMatch] = []
+        self._leaves: list[CharsetMatch] = []
        self._mean_coherence_ratio: float = 0.0

-        self._output_payload: Optional[bytes] = None
-        self._output_encoding: Optional[str] = None
+        self._output_payload: bytes | None = None
+        self._output_encoding: str | None = None

-        self._string: Optional[str] = decoded_payload
+        self._string: str | None = decoded_payload

-        self._preemptive_declaration: Optional[str] = preemptive_declaration
+        self._preemptive_declaration: str | None = preemptive_declaration

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, CharsetMatch):
@ -77,9 +79,9 @@ class CharsetMatch:
        return self._string

    def __repr__(self) -> str:
-        return "<CharsetMatch '{}' bytes({})>".format(self.encoding, self.fingerprint)
+        return f"<CharsetMatch '{self.encoding}' bytes({self.fingerprint})>"

-    def add_submatch(self, other: "CharsetMatch") -> None:
+    def add_submatch(self, other: CharsetMatch) -> None:
        if not isinstance(other, CharsetMatch) or other == self:
            raise ValueError(
                "Unable to add instance <{}> as a submatch of a CharsetMatch".format(
@ -95,11 +97,11 @@ class CharsetMatch:
        return self._encoding

    @property
-    def encoding_aliases(self) -> List[str]:
+    def encoding_aliases(self) -> list[str]:
        """
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        """
-        also_known_as: List[str] = []
+        also_known_as: list[str] = []
        for u, p in aliases.items():
            if self.encoding == u:
                also_known_as.append(p)
@ -116,7 +118,7 @@ class CharsetMatch:
        return self._has_sig_or_bom

    @property
-    def languages(self) -> List[str]:
+    def languages(self) -> list[str]:
        """
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
@ -177,7 +179,7 @@ class CharsetMatch:
        return self._payload

    @property
-    def submatch(self) -> List["CharsetMatch"]:
+    def submatch(self) -> list[CharsetMatch]:
        return self._leaves

    @property
@ -185,19 +187,17 @@ class CharsetMatch:
        return len(self._leaves) > 0

    @property
-    def alphabets(self) -> List[str]:
+    def alphabets(self) -> list[str]:
        if self._unicode_ranges is not None:
            return self._unicode_ranges
        # list detected ranges
-        detected_ranges: List[Optional[str]] = [
-            unicode_range(char) for char in str(self)
-        ]
+        detected_ranges: list[str | None] = [unicode_range(char) for char in str(self)]
        # filter and sort
        self._unicode_ranges = sorted(list({r for r in detected_ranges if r}))
        return self._unicode_ranges

    @property
-    def could_be_from_charset(self) -> List[str]:
+    def could_be_from_charset(self) -> list[str]:
        """
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
@ -221,10 +221,11 @@ class CharsetMatch:
                patched_header = sub(
                    RE_POSSIBLE_ENCODING_INDICATION,
                    lambda m: m.string[m.span()[0] : m.span()[1]].replace(
-                        m.groups()[0], iana_name(self._output_encoding)  # type: ignore[arg-type]
+                        m.groups()[0],
+                        iana_name(self._output_encoding).replace("_", "-"),  # type: ignore[arg-type]
                    ),
                    decoded_string[:8192],
-                    1,
+                    count=1,
                )

                decoded_string = patched_header + decoded_string[8192:]
@ -247,13 +248,13 @@ class CharsetMatches:
    Act like a list(iterable) but does not implements all related methods.
    """

-    def __init__(self, results: Optional[List[CharsetMatch]] = None):
-        self._results: List[CharsetMatch] = sorted(results) if results else []
+    def __init__(self, results: list[CharsetMatch] | None = None):
+        self._results: list[CharsetMatch] = sorted(results) if results else []

    def __iter__(self) -> Iterator[CharsetMatch]:
        yield from self._results

-    def __getitem__(self, item: Union[int, str]) -> CharsetMatch:
+    def __getitem__(self, item: int | str) -> CharsetMatch:
        """
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
@ -293,7 +294,7 @@ class CharsetMatches:
        self._results.append(item)
        self._results = sorted(self._results)

-    def best(self) -> Optional["CharsetMatch"]:
+    def best(self) -> CharsetMatch | None:
        """
        Simply return the first match. Strict equivalent to matches[0].
        """
@ -301,7 +302,7 @@ class CharsetMatches:
            return None
        return self._results[0]

-    def first(self) -> Optional["CharsetMatch"]:
+    def first(self) -> CharsetMatch | None:
        """
        Redundant method, call the method best(). Kept for BC reasons.
        """
@ -316,31 +317,31 @@ class CliDetectionResult:
    def __init__(
        self,
        path: str,
-        encoding: Optional[str],
-        encoding_aliases: List[str],
-        alternative_encodings: List[str],
+        encoding: str | None,
+        encoding_aliases: list[str],
+        alternative_encodings: list[str],
        language: str,
-        alphabets: List[str],
+        alphabets: list[str],
        has_sig_or_bom: bool,
        chaos: float,
        coherence: float,
-        unicode_path: Optional[str],
+        unicode_path: str | None,
        is_preferred: bool,
    ):
        self.path: str = path
-        self.unicode_path: Optional[str] = unicode_path
-        self.encoding: Optional[str] = encoding
-        self.encoding_aliases: List[str] = encoding_aliases
-        self.alternative_encodings: List[str] = alternative_encodings
+        self.unicode_path: str | None = unicode_path
+        self.encoding: str | None = encoding
+        self.encoding_aliases: list[str] = encoding_aliases
+        self.alternative_encodings: list[str] = alternative_encodings
        self.language: str = language
-        self.alphabets: List[str] = alphabets
+        self.alphabets: list[str] = alphabets
        self.has_sig_or_bom: bool = has_sig_or_bom
        self.chaos: float = chaos
        self.coherence: float = coherence
        self.is_preferred: bool = is_preferred

    @property
-    def __dict__(self) -> Dict[str, Any]:  # type: ignore
+    def __dict__(self) -> dict[str, Any]:  # type: ignore
        return {
            "path": self.path,
            "encoding": self.encoding,
--- a/lib/charset_normalizer/utils.py
+++ b/lib/charset_normalizer/utils.py
@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import importlib
 import logging
 import unicodedata
@ -5,9 +7,11 @@ from codecs import IncrementalDecoder
 from encodings.aliases import aliases
 from functools import lru_cache
 from re import findall
-from typing import Generator, List, Optional, Set, Tuple, Union
+from typing import Generator

-from _multibytecodec import MultibyteIncrementalDecoder
+from _multibytecodec import (  # type: ignore[import-not-found,import]
+    MultibyteIncrementalDecoder,
+)

 from .constant import (
    ENCODING_MARKS,
@ -16,6 +20,7 @@ from .constant import (
    UNICODE_RANGES_COMBINED,
    UNICODE_SECONDARY_RANGE_KEYWORD,
    UTF8_MAXIMAL_ALLOCATION,
+    COMMON_CJK_CHARACTERS,
 )


@ -23,7 +28,7 @@ from .constant import (
 def is_accentuated(character: str) -> bool:
    try:
        description: str = unicodedata.name(character)
-    except ValueError:
+    except ValueError:  # Defensive: unicode database outdated?
        return False
    return (
        "WITH GRAVE" in description
@ -43,13 +48,13 @@ def remove_accent(character: str) -> str:
    if not decomposed:
        return character

-    codes: List[str] = decomposed.split(" ")
+    codes: list[str] = decomposed.split(" ")

    return chr(int(codes[0], 16))


@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def unicode_range(character: str) -> Optional[str]:
+def unicode_range(character: str) -> str | None:
    """
    Retrieve the Unicode range official name from a single character.
    """
@ -66,7 +71,7 @@ def unicode_range(character: str) -> Optional[str]:
 def is_latin(character: str) -> bool:
    try:
        description: str = unicodedata.name(character)
-    except ValueError:
+    except ValueError:  # Defensive: unicode database outdated?
        return False
    return "LATIN" in description

@ -78,7 +83,7 @@ def is_punctuation(character: str) -> bool:
    if "P" in character_category:
        return True

-    character_range: Optional[str] = unicode_range(character)
+    character_range: str | None = unicode_range(character)

    if character_range is None:
        return False
@ -93,7 +98,7 @@ def is_symbol(character: str) -> bool:
    if "S" in character_category or "N" in character_category:
        return True

-    character_range: Optional[str] = unicode_range(character)
+    character_range: str | None = unicode_range(character)

    if character_range is None:
        return False
@ -103,7 +108,7 @@ def is_symbol(character: str) -> bool:

@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
 def is_emoticon(character: str) -> bool:
-    character_range: Optional[str] = unicode_range(character)
+    character_range: str | None = unicode_range(character)

    if character_range is None:
        return False
@ -130,7 +135,7 @@ def is_case_variable(character: str) -> bool:
 def is_cjk(character: str) -> bool:
    try:
        character_name = unicodedata.name(character)
-    except ValueError:
+    except ValueError:  # Defensive: unicode database outdated?
        return False

    return "CJK" in character_name
@ -140,7 +145,7 @@ def is_cjk(character: str) -> bool:
 def is_hiragana(character: str) -> bool:
    try:
        character_name = unicodedata.name(character)
-    except ValueError:
+    except ValueError:  # Defensive: unicode database outdated?
        return False

    return "HIRAGANA" in character_name
@ -150,7 +155,7 @@ def is_hiragana(character: str) -> bool:
 def is_katakana(character: str) -> bool:
    try:
        character_name = unicodedata.name(character)
-    except ValueError:
+    except ValueError:  # Defensive: unicode database outdated?
        return False

    return "KATAKANA" in character_name
@ -160,7 +165,7 @@ def is_katakana(character: str) -> bool:
 def is_hangul(character: str) -> bool:
    try:
        character_name = unicodedata.name(character)
-    except ValueError:
+    except ValueError:  # Defensive: unicode database outdated?
        return False

    return "HANGUL" in character_name
@ -170,7 +175,7 @@ def is_hangul(character: str) -> bool:
 def is_thai(character: str) -> bool:
    try:
        character_name = unicodedata.name(character)
-    except ValueError:
+    except ValueError:  # Defensive: unicode database outdated?
        return False

    return "THAI" in character_name
@ -180,7 +185,7 @@ def is_thai(character: str) -> bool:
 def is_arabic(character: str) -> bool:
    try:
        character_name = unicodedata.name(character)
-    except ValueError:
+    except ValueError:  # Defensive: unicode database outdated?
        return False

    return "ARABIC" in character_name
@ -190,12 +195,17 @@ def is_arabic(character: str) -> bool:
 def is_arabic_isolated_form(character: str) -> bool:
    try:
        character_name = unicodedata.name(character)
-    except ValueError:
+    except ValueError:  # Defensive: unicode database outdated?
        return False

    return "ARABIC" in character_name and "ISOLATED FORM" in character_name


+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_cjk_uncommon(character: str) -> bool:
+    return character not in COMMON_CJK_CHARACTERS
+
+
@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED))
 def is_unicode_range_secondary(range_name: str) -> bool:
    return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
@ -206,13 +216,13 @@ def is_unprintable(character: str) -> bool:
    return (
        character.isspace() is False  # includes \n \t \r \v
        and character.isprintable() is False
-        and character != "\x1A"  # Why? Its the ASCII substitute character.
+        and character != "\x1a"  # Why? Its the ASCII substitute character.
        and character != "\ufeff"  # bug discovered in Python,
        # Zero Width No-Break Space located in 	Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space.
    )


-def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> Optional[str]:
+def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> str | None:
    """
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    """
@ -221,7 +231,7 @@ def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> Optional

    seq_len: int = len(sequence)

-    results: List[str] = findall(
+    results: list[str] = findall(
        RE_POSSIBLE_ENCODING_INDICATION,
        sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"),
    )
@ -260,18 +270,18 @@ def is_multi_byte_encoding(name: str) -> bool:
        "utf_32_be",
        "utf_7",
    } or issubclass(
-        importlib.import_module("encodings.{}".format(name)).IncrementalDecoder,
+        importlib.import_module(f"encodings.{name}").IncrementalDecoder,
        MultibyteIncrementalDecoder,
    )


-def identify_sig_or_bom(sequence: bytes) -> Tuple[Optional[str], bytes]:
+def identify_sig_or_bom(sequence: bytes) -> tuple[str | None, bytes]:
    """
    Identify and extract SIG/BOM in given sequence.
    """

    for iana_encoding in ENCODING_MARKS:
-        marks: Union[bytes, List[bytes]] = ENCODING_MARKS[iana_encoding]
+        marks: bytes | list[bytes] = ENCODING_MARKS[iana_encoding]

        if isinstance(marks, bytes):
            marks = [marks]
@ -288,6 +298,7 @@ def should_strip_sig_or_bom(iana_encoding: str) -> bool:


 def iana_name(cp_name: str, strict: bool = True) -> str:
+    """Returns the Python normalized encoding name (Not the IANA official name)."""
    cp_name = cp_name.lower().replace("-", "_")

    encoding_alias: str
@ -298,35 +309,17 @@ def iana_name(cp_name: str, strict: bool = True) -> str:
            return encoding_iana

    if strict:
-        raise ValueError("Unable to retrieve IANA for '{}'".format(cp_name))
+        raise ValueError(f"Unable to retrieve IANA for '{cp_name}'")

    return cp_name


-def range_scan(decoded_sequence: str) -> List[str]:
-    ranges: Set[str] = set()
-
-    for character in decoded_sequence:
-        character_range: Optional[str] = unicode_range(character)
-
-        if character_range is None:
-            continue
-
-        ranges.add(character_range)
-
-    return list(ranges)
-
-
 def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
    if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
        return 0.0

-    decoder_a = importlib.import_module(
-        "encodings.{}".format(iana_name_a)
-    ).IncrementalDecoder
-    decoder_b = importlib.import_module(
-        "encodings.{}".format(iana_name_b)
-    ).IncrementalDecoder
+    decoder_a = importlib.import_module(f"encodings.{iana_name_a}").IncrementalDecoder
+    decoder_b = importlib.import_module(f"encodings.{iana_name_b}").IncrementalDecoder

    id_a: IncrementalDecoder = decoder_a(errors="ignore")
    id_b: IncrementalDecoder = decoder_b(errors="ignore")
@ -374,7 +367,7 @@ def cut_sequence_chunks(
    strip_sig_or_bom: bool,
    sig_payload: bytes,
    is_multi_byte_decoder: bool,
-    decoded_payload: Optional[str] = None,
+    decoded_payload: str | None = None,
 ) -> Generator[str, None, None]:
    if decoded_payload and is_multi_byte_decoder is False:
        for i in offsets:
--- a/lib/charset_normalizer/version.py
+++ b/lib/charset_normalizer/version.py
@ -2,5 +2,7 @@
 Expose version
 """

-__version__ = "3.4.0"
+from __future__ import annotations
+
+__version__ = "3.4.2"
 VERSION = __version__.split(".")