Bump requests-oauthlib from 1.3.1 to 2.0.0 (#2293)

* Bump requests-oauthlib from 1.3.1 to 2.0.0

Bumps [requests-oauthlib](https://github.com/requests/requests-oauthlib) from 1.3.1 to 2.0.0.
- [Release notes](https://github.com/requests/requests-oauthlib/releases)
- [Changelog](https://github.com/requests/requests-oauthlib/blob/master/HISTORY.rst)
- [Commits](https://github.com/requests/requests-oauthlib/compare/v1.3.1...v2.0.0)

---
updated-dependencies:
- dependency-name: requests-oauthlib
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update requests-oauthlib==2.0.0

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2024-03-30 15:28:02 -07:00 committed by GitHub
commit 0d1d2a3e6b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
60 changed files with 2414 additions and 2291 deletions

View file

@ -0,0 +1,4 @@
from .cli import cli_detect
if __name__ == "__main__":
cli_detect()

File diff suppressed because it is too large Load diff

View file

@ -4,8 +4,13 @@ from collections import Counter
from functools import lru_cache
from typing import Counter as TypeCounter, Dict, List, Optional, Tuple
from .assets import FREQUENCIES
from .constant import KO_NAMES, LANGUAGE_SUPPORTED_COUNT, TOO_SMALL_SEQUENCE, ZH_NAMES
from .constant import (
FREQUENCIES,
KO_NAMES,
LANGUAGE_SUPPORTED_COUNT,
TOO_SMALL_SEQUENCE,
ZH_NAMES,
)
from .md import is_suspiciously_successive_range
from .models import CoherenceMatches
from .utils import (

View file

@ -0,0 +1,6 @@
from .__main__ import cli_detect, query_yes_no
__all__ = (
"cli_detect",
"query_yes_no",
)

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,8 @@ from .constant import (
)
from .utils import (
is_accentuated,
is_ascii,
is_arabic,
is_arabic_isolated_form,
is_case_variable,
is_cjk,
is_emoticon,
@ -128,8 +129,9 @@ class TooManyAccentuatedPlugin(MessDetectorPlugin):
@property
def ratio(self) -> float:
if self._character_count == 0 or self._character_count < 8:
if self._character_count < 8:
return 0.0
ratio_of_accentuation: float = self._accentuated_count / self._character_count
return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
@ -234,16 +236,13 @@ class SuspiciousRange(MessDetectorPlugin):
@property
def ratio(self) -> float:
if self._character_count == 0:
if self._character_count <= 24:
return 0.0
ratio_of_suspicious_range_usage: float = (
self._suspicious_successive_range_count * 2
) / self._character_count
if ratio_of_suspicious_range_usage < 0.1:
return 0.0
return ratio_of_suspicious_range_usage
@ -296,7 +295,11 @@ class SuperWeirdWordPlugin(MessDetectorPlugin):
self._is_current_word_bad = True
# Word/Buffer ending with an upper case accentuated letter are so rare,
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
if is_accentuated(self._buffer[-1]) and self._buffer[-1].isupper():
if (
is_accentuated(self._buffer[-1])
and self._buffer[-1].isupper()
and all(_.isupper() for _ in self._buffer) is False
):
self._foreign_long_count += 1
self._is_current_word_bad = True
if buffer_length >= 24 and self._foreign_long_watch:
@ -419,7 +422,7 @@ class ArchaicUpperLowerPlugin(MessDetectorPlugin):
return
if self._current_ascii_only is True and is_ascii(character) is False:
if self._current_ascii_only is True and character.isascii() is False:
self._current_ascii_only = False
if self._last_alpha_seen is not None:
@ -455,6 +458,34 @@ class ArchaicUpperLowerPlugin(MessDetectorPlugin):
return self._successive_upper_lower_count_final / self._character_count
class ArabicIsolatedFormPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._character_count: int = 0
self._isolated_form_count: int = 0
def reset(self) -> None: # pragma: no cover
self._character_count = 0
self._isolated_form_count = 0
def eligible(self, character: str) -> bool:
return is_arabic(character)
def feed(self, character: str) -> None:
self._character_count += 1
if is_arabic_isolated_form(character):
self._isolated_form_count += 1
@property
def ratio(self) -> float:
if self._character_count < 8:
return 0.0
isolated_form_usage: float = self._isolated_form_count / self._character_count
return isolated_form_usage
@lru_cache(maxsize=1024)
def is_suspiciously_successive_range(
unicode_range_a: Optional[str], unicode_range_b: Optional[str]
@ -522,6 +553,8 @@ def is_suspiciously_successive_range(
return False
if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
return False
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
return False
return True

View file

@ -54,16 +54,19 @@ class CharsetMatch:
# Below 1% difference --> Use Coherence
if chaos_difference < 0.01 and coherence_difference > 0.02:
# When having a tough decision, use the result that decoded as many multi-byte as possible.
if chaos_difference == 0.0 and self.coherence == other.coherence:
return self.multi_byte_usage > other.multi_byte_usage
return self.coherence > other.coherence
elif chaos_difference < 0.01 and coherence_difference <= 0.02:
# When having a difficult decision, use the result that decoded as many multi-byte as possible.
# preserve RAM usage!
if len(self._payload) >= TOO_BIG_SEQUENCE:
return self.chaos < other.chaos
return self.multi_byte_usage > other.multi_byte_usage
return self.chaos < other.chaos
@property
def multi_byte_usage(self) -> float:
return 1.0 - len(str(self)) / len(self.raw)
return 1.0 - (len(str(self)) / len(self.raw))
def __str__(self) -> str:
# Lazy Str Loading

View file

@ -32,6 +32,8 @@ def is_accentuated(character: str) -> bool:
or "WITH DIAERESIS" in description
or "WITH CIRCUMFLEX" in description
or "WITH TILDE" in description
or "WITH MACRON" in description
or "WITH RING ABOVE" in description
)
@ -69,15 +71,6 @@ def is_latin(character: str) -> bool:
return "LATIN" in description
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_ascii(character: str) -> bool:
try:
character.encode("ascii")
except UnicodeEncodeError:
return False
return True
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_punctuation(character: str) -> bool:
character_category: str = unicodedata.category(character)
@ -105,7 +98,7 @@ def is_symbol(character: str) -> bool:
if character_range is None:
return False
return "Forms" in character_range
return "Forms" in character_range and character_category != "Lo"
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
@ -115,7 +108,7 @@ def is_emoticon(character: str) -> bool:
if character_range is None:
return False
return "Emoticons" in character_range
return "Emoticons" in character_range or "Pictographs" in character_range
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
@ -133,12 +126,6 @@ def is_case_variable(character: str) -> bool:
return character.islower() != character.isupper()
def is_private_use_only(character: str) -> bool:
character_category: str = unicodedata.category(character)
return character_category == "Co"
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_cjk(character: str) -> bool:
try:
@ -189,6 +176,26 @@ def is_thai(character: str) -> bool:
return "THAI" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_arabic(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError:
return False
return "ARABIC" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_arabic_isolated_form(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError:
return False
return "ARABIC" in character_name and "ISOLATED FORM" in character_name
@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED))
def is_unicode_range_secondary(range_name: str) -> bool:
return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
@ -205,7 +212,7 @@ def is_unprintable(character: str) -> bool:
)
def any_specified_encoding(sequence: bytes, search_zone: int = 4096) -> Optional[str]:
def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> Optional[str]:
"""
Extract using ASCII-only decoder any specified encoding in the first n-bytes.
"""

View file

@ -2,5 +2,5 @@
Expose version
"""
__version__ = "3.2.0"
__version__ = "3.3.2"
VERSION = __version__.split(".")