Bump requests from 2.28.2 to 2.31.0 (#2078)

* Bump requests from 2.28.2 to 2.31.0 Bumps [requests](https://github.com/psf/requests) from 2.28.2 to 2.31.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.28.2...v2.31.0) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update requests==2.31.0 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-08-14 02:26:58 -07:00 · 2023-08-23 21:40:02 -07:00 · 2023-08-23 21:40:02 -07:00 · 6b6d43ef43
commit 6b6d43ef43
parent 478d9e6aa5
54 changed files with 4861 additions and 4958 deletions
--- a/lib/charset_normalizer/init.py
+++ b/lib/charset_normalizer/init.py
@ -21,7 +21,7 @@ at <https://github.com/Ousret/charset_normalizer>.
 """
 import logging

-from .api import from_bytes, from_fp, from_path
+from .api import from_bytes, from_fp, from_path, is_binary
 from .legacy import detect
 from .models import CharsetMatch, CharsetMatches
 from .utils import set_logging_handler
@ -31,6 +31,7 @@ __all__ = (
    "from_fp",
    "from_path",
    "from_bytes",
+    "is_binary",
    "detect",
    "CharsetMatch",
    "CharsetMatches",
--- a/lib/charset_normalizer/api.py
+++ b/lib/charset_normalizer/api.py
@ -1,6 +1,6 @@
 import logging
 from os import PathLike
-from typing import Any, BinaryIO, List, Optional, Set
+from typing import BinaryIO, List, Optional, Set, Union

 from .cd import (
    coherence_ratio,
@ -31,7 +31,7 @@ explain_handler.setFormatter(


 def from_bytes(
-    sequences: bytes,
+    sequences: Union[bytes, bytearray],
    steps: int = 5,
    chunk_size: int = 512,
    threshold: float = 0.2,
@ -40,6 +40,7 @@ def from_bytes(
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
+    enable_fallback: bool = True,
 ) -> CharsetMatches:
    """
    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
@ -361,7 +362,8 @@ def from_bytes(
            )
            # Preparing those fallbacks in case we got nothing.
            if (
-                encoding_iana in ["ascii", "utf_8", specified_encoding]
+                enable_fallback
+                and encoding_iana in ["ascii", "utf_8", specified_encoding]
                and not lazy_str_hard_failure
            ):
                fallback_entry = CharsetMatch(
@ -507,6 +509,7 @@ def from_fp(
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
+    enable_fallback: bool = True,
 ) -> CharsetMatches:
    """
    Same thing than the function from_bytes but using a file pointer that is already ready.
@ -522,11 +525,12 @@ def from_fp(
        preemptive_behaviour,
        explain,
        language_threshold,
+        enable_fallback,
    )


 def from_path(
-    path: "PathLike[Any]",
+    path: Union[str, bytes, PathLike],  # type: ignore[type-arg]
    steps: int = 5,
    chunk_size: int = 512,
    threshold: float = 0.20,
@ -535,6 +539,7 @@ def from_path(
    preemptive_behaviour: bool = True,
    explain: bool = False,
    language_threshold: float = 0.1,
+    enable_fallback: bool = True,
 ) -> CharsetMatches:
    """
    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
@ -551,4 +556,71 @@ def from_path(
            preemptive_behaviour,
            explain,
            language_threshold,
+            enable_fallback,
        )
+
+
+def is_binary(
+    fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes],  # type: ignore[type-arg]
+    steps: int = 5,
+    chunk_size: int = 512,
+    threshold: float = 0.20,
+    cp_isolation: Optional[List[str]] = None,
+    cp_exclusion: Optional[List[str]] = None,
+    preemptive_behaviour: bool = True,
+    explain: bool = False,
+    language_threshold: float = 0.1,
+    enable_fallback: bool = False,
+) -> bool:
+    """
+    Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
+    Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
+    are disabled to be stricter around ASCII-compatible but unlikely to be a string.
+    """
+    if isinstance(fp_or_path_or_payload, (str, PathLike)):
+        guesses = from_path(
+            fp_or_path_or_payload,
+            steps=steps,
+            chunk_size=chunk_size,
+            threshold=threshold,
+            cp_isolation=cp_isolation,
+            cp_exclusion=cp_exclusion,
+            preemptive_behaviour=preemptive_behaviour,
+            explain=explain,
+            language_threshold=language_threshold,
+            enable_fallback=enable_fallback,
+        )
+    elif isinstance(
+        fp_or_path_or_payload,
+        (
+            bytes,
+            bytearray,
+        ),
+    ):
+        guesses = from_bytes(
+            fp_or_path_or_payload,
+            steps=steps,
+            chunk_size=chunk_size,
+            threshold=threshold,
+            cp_isolation=cp_isolation,
+            cp_exclusion=cp_exclusion,
+            preemptive_behaviour=preemptive_behaviour,
+            explain=explain,
+            language_threshold=language_threshold,
+            enable_fallback=enable_fallback,
+        )
+    else:
+        guesses = from_fp(
+            fp_or_path_or_payload,
+            steps=steps,
+            chunk_size=chunk_size,
+            threshold=threshold,
+            cp_isolation=cp_isolation,
+            cp_exclusion=cp_exclusion,
+            preemptive_behaviour=preemptive_behaviour,
+            explain=explain,
+            language_threshold=language_threshold,
+            enable_fallback=enable_fallback,
+        )
+
+    return not guesses
--- a/lib/charset_normalizer/md.py
+++ b/lib/charset_normalizer/md.py
@ -294,14 +294,25 @@ class SuperWeirdWordPlugin(MessDetectorPlugin):
            if buffer_length >= 4:
                if self._buffer_accent_count / buffer_length > 0.34:
                    self._is_current_word_bad = True
-                # Word/Buffer ending with a upper case accentuated letter are so rare,
+                # Word/Buffer ending with an upper case accentuated letter are so rare,
                # that we will consider them all as suspicious. Same weight as foreign_long suspicious.
                if is_accentuated(self._buffer[-1]) and self._buffer[-1].isupper():
                    self._foreign_long_count += 1
                    self._is_current_word_bad = True
            if buffer_length >= 24 and self._foreign_long_watch:
-                self._foreign_long_count += 1
-                self._is_current_word_bad = True
+                camel_case_dst = [
+                    i
+                    for c, i in zip(self._buffer, range(0, buffer_length))
+                    if c.isupper()
+                ]
+                probable_camel_cased: bool = False
+
+                if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
+                    probable_camel_cased = True
+
+                if not probable_camel_cased:
+                    self._foreign_long_count += 1
+                    self._is_current_word_bad = True

            if self._is_current_word_bad:
                self._bad_word_count += 1
--- a/lib/charset_normalizer/utils.py
+++ b/lib/charset_normalizer/utils.py
@ -120,12 +120,12 @@ def is_emoticon(character: str) -> bool:

@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
 def is_separator(character: str) -> bool:
-    if character.isspace() or character in {"｜", "+", ",", ";", "<", ">"}:
+    if character.isspace() or character in {"｜", "+", "<", ">"}:
        return True

    character_category: str = unicodedata.category(character)

-    return "Z" in character_category
+    return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}


@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
--- a/lib/charset_normalizer/version.py
+++ b/lib/charset_normalizer/version.py
@ -2,5 +2,5 @@
 Expose version
 """

-__version__ = "3.1.0"
+__version__ = "3.2.0"
 VERSION = __version__.split(".")