Bump requests from 2.28.2 to 2.31.0 (#2078)

* Bump requests from 2.28.2 to 2.31.0

Bumps [requests](https://github.com/psf/requests) from 2.28.2 to 2.31.0.
- [Release notes](https://github.com/psf/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)
- [Commits](https://github.com/psf/requests/compare/v2.28.2...v2.31.0)

---
updated-dependencies:
- dependency-name: requests
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update requests==2.31.0

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2023-08-23 21:40:02 -07:00 committed by GitHub
commit 6b6d43ef43
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
54 changed files with 4861 additions and 4958 deletions

View file

@ -21,7 +21,7 @@ at <https://github.com/Ousret/charset_normalizer>.
"""
import logging
from .api import from_bytes, from_fp, from_path
from .api import from_bytes, from_fp, from_path, is_binary
from .legacy import detect
from .models import CharsetMatch, CharsetMatches
from .utils import set_logging_handler
@ -31,6 +31,7 @@ __all__ = (
"from_fp",
"from_path",
"from_bytes",
"is_binary",
"detect",
"CharsetMatch",
"CharsetMatches",

View file

@ -1,6 +1,6 @@
import logging
from os import PathLike
from typing import Any, BinaryIO, List, Optional, Set
from typing import BinaryIO, List, Optional, Set, Union
from .cd import (
coherence_ratio,
@ -31,7 +31,7 @@ explain_handler.setFormatter(
def from_bytes(
sequences: bytes,
sequences: Union[bytes, bytearray],
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.2,
@ -40,6 +40,7 @@ def from_bytes(
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
@ -361,7 +362,8 @@ def from_bytes(
)
# Preparing those fallbacks in case we got nothing.
if (
encoding_iana in ["ascii", "utf_8", specified_encoding]
enable_fallback
and encoding_iana in ["ascii", "utf_8", specified_encoding]
and not lazy_str_hard_failure
):
fallback_entry = CharsetMatch(
@ -507,6 +509,7 @@ def from_fp(
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but using a file pointer that is already ready.
@ -522,11 +525,12 @@ def from_fp(
preemptive_behaviour,
explain,
language_threshold,
enable_fallback,
)
def from_path(
path: "PathLike[Any]",
path: Union[str, bytes, PathLike], # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
@ -535,6 +539,7 @@ def from_path(
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
@ -551,4 +556,71 @@ def from_path(
preemptive_behaviour,
explain,
language_threshold,
enable_fallback,
)
def is_binary(
fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes], # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: Optional[List[str]] = None,
cp_exclusion: Optional[List[str]] = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = False,
) -> bool:
"""
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
"""
if isinstance(fp_or_path_or_payload, (str, PathLike)):
guesses = from_path(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
elif isinstance(
fp_or_path_or_payload,
(
bytes,
bytearray,
),
):
guesses = from_bytes(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
else:
guesses = from_fp(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
return not guesses

View file

@ -294,14 +294,25 @@ class SuperWeirdWordPlugin(MessDetectorPlugin):
if buffer_length >= 4:
if self._buffer_accent_count / buffer_length > 0.34:
self._is_current_word_bad = True
# Word/Buffer ending with a upper case accentuated letter are so rare,
# Word/Buffer ending with an upper case accentuated letter are so rare,
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
if is_accentuated(self._buffer[-1]) and self._buffer[-1].isupper():
self._foreign_long_count += 1
self._is_current_word_bad = True
if buffer_length >= 24 and self._foreign_long_watch:
self._foreign_long_count += 1
self._is_current_word_bad = True
camel_case_dst = [
i
for c, i in zip(self._buffer, range(0, buffer_length))
if c.isupper()
]
probable_camel_cased: bool = False
if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
probable_camel_cased = True
if not probable_camel_cased:
self._foreign_long_count += 1
self._is_current_word_bad = True
if self._is_current_word_bad:
self._bad_word_count += 1

View file

@ -120,12 +120,12 @@ def is_emoticon(character: str) -> bool:
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_separator(character: str) -> bool:
if character.isspace() or character in {"", "+", ",", ";", "<", ">"}:
if character.isspace() or character in {"", "+", "<", ">"}:
return True
character_category: str = unicodedata.category(character)
return "Z" in character_category
return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)

View file

@ -2,5 +2,5 @@
Expose version
"""
__version__ = "3.1.0"
__version__ = "3.2.0"
VERSION = __version__.split(".")