Bump idna from 3.7 to 3.10 (#2400)

* Bump idna from 3.7 to 3.10

Bumps [idna](https://github.com/kjd/idna) from 3.7 to 3.10.
- [Release notes](https://github.com/kjd/idna/releases)
- [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst)
- [Commits](https://github.com/kjd/idna/compare/v3.7...v3.10)

---
updated-dependencies:
- dependency-name: idna
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update idna==3.10

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2024-11-16 14:50:57 -08:00 committed by GitHub
parent 48b1c7b522
commit bf07912711
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 12007 additions and 11875 deletions

View file

@ -1,4 +1,3 @@
from .package_data import __version__
from .core import ( from .core import (
IDNABidiError, IDNABidiError,
IDNAError, IDNAError,
@ -20,8 +19,10 @@ from .core import (
valid_string_length, valid_string_length,
) )
from .intranges import intranges_contain from .intranges import intranges_contain
from .package_data import __version__
__all__ = [ __all__ = [
"__version__",
"IDNABidiError", "IDNABidiError",
"IDNAError", "IDNAError",
"InvalidCodepoint", "InvalidCodepoint",

View file

@ -1,49 +1,51 @@
from .core import encode, decode, alabel, ulabel, IDNAError
import codecs import codecs
import re import re
from typing import Any, Tuple, Optional from typing import Any, Optional, Tuple
from .core import IDNAError, alabel, decode, encode, ulabel
_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]")
_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]')
class Codec(codecs.Codec): class Codec(codecs.Codec):
def encode(self, data: str, errors: str = "strict") -> Tuple[bytes, int]:
def encode(self, data: str, errors: str = 'strict') -> Tuple[bytes, int]: if errors != "strict":
if errors != 'strict': raise IDNAError('Unsupported error handling "{}"'.format(errors))
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
if not data: if not data:
return b"", 0 return b"", 0
return encode(data), len(data) return encode(data), len(data)
def decode(self, data: bytes, errors: str = 'strict') -> Tuple[str, int]: def decode(self, data: bytes, errors: str = "strict") -> Tuple[str, int]:
if errors != 'strict': if errors != "strict":
raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) raise IDNAError('Unsupported error handling "{}"'.format(errors))
if not data: if not data:
return '', 0 return "", 0
return decode(data), len(data) return decode(data), len(data)
class IncrementalEncoder(codecs.BufferedIncrementalEncoder): class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]: def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]:
if errors != 'strict': if errors != "strict":
raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) raise IDNAError('Unsupported error handling "{}"'.format(errors))
if not data: if not data:
return b'', 0 return b"", 0
labels = _unicode_dots_re.split(data) labels = _unicode_dots_re.split(data)
trailing_dot = b'' trailing_dot = b""
if labels: if labels:
if not labels[-1]: if not labels[-1]:
trailing_dot = b'.' trailing_dot = b"."
del labels[-1] del labels[-1]
elif not final: elif not final:
# Keep potentially unfinished label until the next call # Keep potentially unfinished label until the next call
del labels[-1] del labels[-1]
if labels: if labels:
trailing_dot = b'.' trailing_dot = b"."
result = [] result = []
size = 0 size = 0
@ -54,32 +56,33 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
size += len(label) size += len(label)
# Join with U+002E # Join with U+002E
result_bytes = b'.'.join(result) + trailing_dot result_bytes = b".".join(result) + trailing_dot
size += len(trailing_dot) size += len(trailing_dot)
return result_bytes, size return result_bytes, size
class IncrementalDecoder(codecs.BufferedIncrementalDecoder): class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]: def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]:
if errors != 'strict': if errors != "strict":
raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) raise IDNAError('Unsupported error handling "{}"'.format(errors))
if not data: if not data:
return ('', 0) return ("", 0)
if not isinstance(data, str): if not isinstance(data, str):
data = str(data, 'ascii') data = str(data, "ascii")
labels = _unicode_dots_re.split(data) labels = _unicode_dots_re.split(data)
trailing_dot = '' trailing_dot = ""
if labels: if labels:
if not labels[-1]: if not labels[-1]:
trailing_dot = '.' trailing_dot = "."
del labels[-1] del labels[-1]
elif not final: elif not final:
# Keep potentially unfinished label until the next call # Keep potentially unfinished label until the next call
del labels[-1] del labels[-1]
if labels: if labels:
trailing_dot = '.' trailing_dot = "."
result = [] result = []
size = 0 size = 0
@ -89,7 +92,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
size += 1 size += 1
size += len(label) size += len(label)
result_str = '.'.join(result) + trailing_dot result_str = ".".join(result) + trailing_dot
size += len(trailing_dot) size += len(trailing_dot)
return (result_str, size) return (result_str, size)
@ -103,7 +106,7 @@ class StreamReader(Codec, codecs.StreamReader):
def search_function(name: str) -> Optional[codecs.CodecInfo]: def search_function(name: str) -> Optional[codecs.CodecInfo]:
if name != 'idna2008': if name != "idna2008":
return None return None
return codecs.CodecInfo( return codecs.CodecInfo(
name=name, name=name,
@ -115,4 +118,5 @@ def search_function(name: str) -> Optional[codecs.CodecInfo]:
streamreader=StreamReader, streamreader=StreamReader,
) )
codecs.register(search_function) codecs.register(search_function)

View file

@ -1,13 +1,15 @@
from .core import *
from .codec import *
from typing import Any, Union from typing import Any, Union
from .core import decode, encode
def ToASCII(label: str) -> bytes: def ToASCII(label: str) -> bytes:
return encode(label) return encode(label)
def ToUnicode(label: Union[bytes, bytearray]) -> str: def ToUnicode(label: Union[bytes, bytearray]) -> str:
return decode(label) return decode(label)
def nameprep(s: Any) -> None:
raise NotImplementedError('IDNA 2008 does not utilise nameprep protocol')
def nameprep(s: Any) -> None:
raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")

View file

@ -1,31 +1,37 @@
from . import idnadata
import bisect import bisect
import unicodedata
import re import re
from typing import Union, Optional import unicodedata
from typing import Optional, Union
from . import idnadata
from .intranges import intranges_contain from .intranges import intranges_contain
_virama_combining_class = 9 _virama_combining_class = 9
_alabel_prefix = b'xn--' _alabel_prefix = b"xn--"
_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]') _unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]")
class IDNAError(UnicodeError): class IDNAError(UnicodeError):
""" Base exception for all IDNA-encoding related problems """ """Base exception for all IDNA-encoding related problems"""
pass pass
class IDNABidiError(IDNAError): class IDNABidiError(IDNAError):
""" Exception when bidirectional requirements are not satisfied """ """Exception when bidirectional requirements are not satisfied"""
pass pass
class InvalidCodepoint(IDNAError): class InvalidCodepoint(IDNAError):
""" Exception when a disallowed or unallocated codepoint is used """ """Exception when a disallowed or unallocated codepoint is used"""
pass pass
class InvalidCodepointContext(IDNAError): class InvalidCodepointContext(IDNAError):
""" Exception when the codepoint is not valid in the context it is used """ """Exception when the codepoint is not valid in the context it is used"""
pass pass
@ -33,17 +39,20 @@ def _combining_class(cp: int) -> int:
v = unicodedata.combining(chr(cp)) v = unicodedata.combining(chr(cp))
if v == 0: if v == 0:
if not unicodedata.name(chr(cp)): if not unicodedata.name(chr(cp)):
raise ValueError('Unknown character in unicodedata') raise ValueError("Unknown character in unicodedata")
return v return v
def _is_script(cp: str, script: str) -> bool: def _is_script(cp: str, script: str) -> bool:
return intranges_contain(ord(cp), idnadata.scripts[script]) return intranges_contain(ord(cp), idnadata.scripts[script])
def _punycode(s: str) -> bytes: def _punycode(s: str) -> bytes:
return s.encode('punycode') return s.encode("punycode")
def _unot(s: int) -> str: def _unot(s: int) -> str:
return 'U+{:04X}'.format(s) return "U+{:04X}".format(s)
def valid_label_length(label: Union[bytes, str]) -> bool: def valid_label_length(label: Union[bytes, str]) -> bool:
@ -61,96 +70,106 @@ def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool:
def check_bidi(label: str, check_ltr: bool = False) -> bool: def check_bidi(label: str, check_ltr: bool = False) -> bool:
# Bidi rules should only be applied if string contains RTL characters # Bidi rules should only be applied if string contains RTL characters
bidi_label = False bidi_label = False
for (idx, cp) in enumerate(label, 1): for idx, cp in enumerate(label, 1):
direction = unicodedata.bidirectional(cp) direction = unicodedata.bidirectional(cp)
if direction == '': if direction == "":
# String likely comes from a newer version of Unicode # String likely comes from a newer version of Unicode
raise IDNABidiError('Unknown directionality in label {} at position {}'.format(repr(label), idx)) raise IDNABidiError("Unknown directionality in label {} at position {}".format(repr(label), idx))
if direction in ['R', 'AL', 'AN']: if direction in ["R", "AL", "AN"]:
bidi_label = True bidi_label = True
if not bidi_label and not check_ltr: if not bidi_label and not check_ltr:
return True return True
# Bidi rule 1 # Bidi rule 1
direction = unicodedata.bidirectional(label[0]) direction = unicodedata.bidirectional(label[0])
if direction in ['R', 'AL']: if direction in ["R", "AL"]:
rtl = True rtl = True
elif direction == 'L': elif direction == "L":
rtl = False rtl = False
else: else:
raise IDNABidiError('First codepoint in label {} must be directionality L, R or AL'.format(repr(label))) raise IDNABidiError("First codepoint in label {} must be directionality L, R or AL".format(repr(label)))
valid_ending = False valid_ending = False
number_type = None # type: Optional[str] number_type: Optional[str] = None
for (idx, cp) in enumerate(label, 1): for idx, cp in enumerate(label, 1):
direction = unicodedata.bidirectional(cp) direction = unicodedata.bidirectional(cp)
if rtl: if rtl:
# Bidi rule 2 # Bidi rule 2
if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: if direction not in [
raise IDNABidiError('Invalid direction for codepoint at position {} in a right-to-left label'.format(idx)) "R",
"AL",
"AN",
"EN",
"ES",
"CS",
"ET",
"ON",
"BN",
"NSM",
]:
raise IDNABidiError("Invalid direction for codepoint at position {} in a right-to-left label".format(idx))
# Bidi rule 3 # Bidi rule 3
if direction in ['R', 'AL', 'EN', 'AN']: if direction in ["R", "AL", "EN", "AN"]:
valid_ending = True valid_ending = True
elif direction != 'NSM': elif direction != "NSM":
valid_ending = False valid_ending = False
# Bidi rule 4 # Bidi rule 4
if direction in ['AN', 'EN']: if direction in ["AN", "EN"]:
if not number_type: if not number_type:
number_type = direction number_type = direction
else: else:
if number_type != direction: if number_type != direction:
raise IDNABidiError('Can not mix numeral types in a right-to-left label') raise IDNABidiError("Can not mix numeral types in a right-to-left label")
else: else:
# Bidi rule 5 # Bidi rule 5
if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: if direction not in ["L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"]:
raise IDNABidiError('Invalid direction for codepoint at position {} in a left-to-right label'.format(idx)) raise IDNABidiError("Invalid direction for codepoint at position {} in a left-to-right label".format(idx))
# Bidi rule 6 # Bidi rule 6
if direction in ['L', 'EN']: if direction in ["L", "EN"]:
valid_ending = True valid_ending = True
elif direction != 'NSM': elif direction != "NSM":
valid_ending = False valid_ending = False
if not valid_ending: if not valid_ending:
raise IDNABidiError('Label ends with illegal codepoint directionality') raise IDNABidiError("Label ends with illegal codepoint directionality")
return True return True
def check_initial_combiner(label: str) -> bool: def check_initial_combiner(label: str) -> bool:
if unicodedata.category(label[0])[0] == 'M': if unicodedata.category(label[0])[0] == "M":
raise IDNAError('Label begins with an illegal combining character') raise IDNAError("Label begins with an illegal combining character")
return True return True
def check_hyphen_ok(label: str) -> bool: def check_hyphen_ok(label: str) -> bool:
if label[2:4] == '--': if label[2:4] == "--":
raise IDNAError('Label has disallowed hyphens in 3rd and 4th position') raise IDNAError("Label has disallowed hyphens in 3rd and 4th position")
if label[0] == '-' or label[-1] == '-': if label[0] == "-" or label[-1] == "-":
raise IDNAError('Label must not start or end with a hyphen') raise IDNAError("Label must not start or end with a hyphen")
return True return True
def check_nfc(label: str) -> None: def check_nfc(label: str) -> None:
if unicodedata.normalize('NFC', label) != label: if unicodedata.normalize("NFC", label) != label:
raise IDNAError('Label must be in Normalization Form C') raise IDNAError("Label must be in Normalization Form C")
def valid_contextj(label: str, pos: int) -> bool: def valid_contextj(label: str, pos: int) -> bool:
cp_value = ord(label[pos]) cp_value = ord(label[pos])
if cp_value == 0x200c: if cp_value == 0x200C:
if pos > 0: if pos > 0:
if _combining_class(ord(label[pos - 1])) == _virama_combining_class: if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
return True return True
ok = False ok = False
for i in range(pos-1, -1, -1): for i in range(pos - 1, -1, -1):
joining_type = idnadata.joining_types.get(ord(label[i])) joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord('T'): if joining_type == ord("T"):
continue continue
elif joining_type in [ord('L'), ord('D')]: elif joining_type in [ord("L"), ord("D")]:
ok = True ok = True
break break
else: else:
@ -160,63 +179,61 @@ def valid_contextj(label: str, pos: int) -> bool:
return False return False
ok = False ok = False
for i in range(pos+1, len(label)): for i in range(pos + 1, len(label)):
joining_type = idnadata.joining_types.get(ord(label[i])) joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord('T'): if joining_type == ord("T"):
continue continue
elif joining_type in [ord('R'), ord('D')]: elif joining_type in [ord("R"), ord("D")]:
ok = True ok = True
break break
else: else:
break break
return ok return ok
if cp_value == 0x200d: if cp_value == 0x200D:
if pos > 0: if pos > 0:
if _combining_class(ord(label[pos - 1])) == _virama_combining_class: if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
return True return True
return False return False
else: else:
return False return False
def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
cp_value = ord(label[pos]) cp_value = ord(label[pos])
if cp_value == 0x00b7: if cp_value == 0x00B7:
if 0 < pos < len(label)-1: if 0 < pos < len(label) - 1:
if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c: if ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C:
return True return True
return False return False
elif cp_value == 0x0375: elif cp_value == 0x0375:
if pos < len(label)-1 and len(label) > 1: if pos < len(label) - 1 and len(label) > 1:
return _is_script(label[pos + 1], 'Greek') return _is_script(label[pos + 1], "Greek")
return False return False
elif cp_value == 0x05f3 or cp_value == 0x05f4: elif cp_value == 0x05F3 or cp_value == 0x05F4:
if pos > 0: if pos > 0:
return _is_script(label[pos - 1], 'Hebrew') return _is_script(label[pos - 1], "Hebrew")
return False return False
elif cp_value == 0x30fb: elif cp_value == 0x30FB:
for cp in label: for cp in label:
if cp == '\u30fb': if cp == "\u30fb":
continue continue
if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'): if _is_script(cp, "Hiragana") or _is_script(cp, "Katakana") or _is_script(cp, "Han"):
return True return True
return False return False
elif 0x660 <= cp_value <= 0x669: elif 0x660 <= cp_value <= 0x669:
for cp in label: for cp in label:
if 0x6f0 <= ord(cp) <= 0x06f9: if 0x6F0 <= ord(cp) <= 0x06F9:
return False return False
return True return True
elif 0x6f0 <= cp_value <= 0x6f9: elif 0x6F0 <= cp_value <= 0x6F9:
for cp in label: for cp in label:
if 0x660 <= ord(cp) <= 0x0669: if 0x660 <= ord(cp) <= 0x0669:
return False return False
@ -227,37 +244,49 @@ def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
def check_label(label: Union[str, bytes, bytearray]) -> None: def check_label(label: Union[str, bytes, bytearray]) -> None:
if isinstance(label, (bytes, bytearray)): if isinstance(label, (bytes, bytearray)):
label = label.decode('utf-8') label = label.decode("utf-8")
if len(label) == 0: if len(label) == 0:
raise IDNAError('Empty Label') raise IDNAError("Empty Label")
check_nfc(label) check_nfc(label)
check_hyphen_ok(label) check_hyphen_ok(label)
check_initial_combiner(label) check_initial_combiner(label)
for (pos, cp) in enumerate(label): for pos, cp in enumerate(label):
cp_value = ord(cp) cp_value = ord(cp)
if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']): if intranges_contain(cp_value, idnadata.codepoint_classes["PVALID"]):
continue continue
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']): elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTJ"]):
if not valid_contextj(label, pos): try:
raise InvalidCodepointContext('Joiner {} not allowed at position {} in {}'.format( if not valid_contextj(label, pos):
_unot(cp_value), pos+1, repr(label))) raise InvalidCodepointContext(
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']): "Joiner {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label))
)
except ValueError:
raise IDNAError(
"Unknown codepoint adjacent to joiner {} at position {} in {}".format(
_unot(cp_value), pos + 1, repr(label)
)
)
elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTO"]):
if not valid_contexto(label, pos): if not valid_contexto(label, pos):
raise InvalidCodepointContext('Codepoint {} not allowed at position {} in {}'.format(_unot(cp_value), pos+1, repr(label))) raise InvalidCodepointContext(
"Codepoint {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label))
)
else: else:
raise InvalidCodepoint('Codepoint {} at position {} of {} not allowed'.format(_unot(cp_value), pos+1, repr(label))) raise InvalidCodepoint(
"Codepoint {} at position {} of {} not allowed".format(_unot(cp_value), pos + 1, repr(label))
)
check_bidi(label) check_bidi(label)
def alabel(label: str) -> bytes: def alabel(label: str) -> bytes:
try: try:
label_bytes = label.encode('ascii') label_bytes = label.encode("ascii")
ulabel(label_bytes) ulabel(label_bytes)
if not valid_label_length(label_bytes): if not valid_label_length(label_bytes):
raise IDNAError('Label too long') raise IDNAError("Label too long")
return label_bytes return label_bytes
except UnicodeEncodeError: except UnicodeEncodeError:
pass pass
@ -266,7 +295,7 @@ def alabel(label: str) -> bytes:
label_bytes = _alabel_prefix + _punycode(label) label_bytes = _alabel_prefix + _punycode(label)
if not valid_label_length(label_bytes): if not valid_label_length(label_bytes):
raise IDNAError('Label too long') raise IDNAError("Label too long")
return label_bytes return label_bytes
@ -274,7 +303,7 @@ def alabel(label: str) -> bytes:
def ulabel(label: Union[str, bytes, bytearray]) -> str: def ulabel(label: Union[str, bytes, bytearray]) -> str:
if not isinstance(label, (bytes, bytearray)): if not isinstance(label, (bytes, bytearray)):
try: try:
label_bytes = label.encode('ascii') label_bytes = label.encode("ascii")
except UnicodeEncodeError: except UnicodeEncodeError:
check_label(label) check_label(label)
return label return label
@ -283,19 +312,19 @@ def ulabel(label: Union[str, bytes, bytearray]) -> str:
label_bytes = label_bytes.lower() label_bytes = label_bytes.lower()
if label_bytes.startswith(_alabel_prefix): if label_bytes.startswith(_alabel_prefix):
label_bytes = label_bytes[len(_alabel_prefix):] label_bytes = label_bytes[len(_alabel_prefix) :]
if not label_bytes: if not label_bytes:
raise IDNAError('Malformed A-label, no Punycode eligible content found') raise IDNAError("Malformed A-label, no Punycode eligible content found")
if label_bytes.decode('ascii')[-1] == '-': if label_bytes.decode("ascii")[-1] == "-":
raise IDNAError('A-label must not end with a hyphen') raise IDNAError("A-label must not end with a hyphen")
else: else:
check_label(label_bytes) check_label(label_bytes)
return label_bytes.decode('ascii') return label_bytes.decode("ascii")
try: try:
label = label_bytes.decode('punycode') label = label_bytes.decode("punycode")
except UnicodeError: except UnicodeError:
raise IDNAError('Invalid A-label') raise IDNAError("Invalid A-label")
check_label(label) check_label(label)
return label return label
@ -303,52 +332,60 @@ def ulabel(label: Union[str, bytes, bytearray]) -> str:
def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str: def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str:
"""Re-map the characters in the string according to UTS46 processing.""" """Re-map the characters in the string according to UTS46 processing."""
from .uts46data import uts46data from .uts46data import uts46data
output = ''
output = ""
for pos, char in enumerate(domain): for pos, char in enumerate(domain):
code_point = ord(char) code_point = ord(char)
try: try:
uts46row = uts46data[code_point if code_point < 256 else uts46row = uts46data[code_point if code_point < 256 else bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
bisect.bisect_left(uts46data, (code_point, 'Z')) - 1]
status = uts46row[1] status = uts46row[1]
replacement = None # type: Optional[str] replacement: Optional[str] = None
if len(uts46row) == 3: if len(uts46row) == 3:
replacement = uts46row[2] replacement = uts46row[2]
if (status == 'V' or if (
(status == 'D' and not transitional) or status == "V"
(status == '3' and not std3_rules and replacement is None)): or (status == "D" and not transitional)
or (status == "3" and not std3_rules and replacement is None)
):
output += char output += char
elif replacement is not None and (status == 'M' or elif replacement is not None and (
(status == '3' and not std3_rules) or status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional)
(status == 'D' and transitional)): ):
output += replacement output += replacement
elif status != 'I': elif status != "I":
raise IndexError() raise IndexError()
except IndexError: except IndexError:
raise InvalidCodepoint( raise InvalidCodepoint(
'Codepoint {} not allowed at position {} in {}'.format( "Codepoint {} not allowed at position {} in {}".format(_unot(code_point), pos + 1, repr(domain))
_unot(code_point), pos + 1, repr(domain))) )
return unicodedata.normalize('NFC', output) return unicodedata.normalize("NFC", output)
def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes: def encode(
s: Union[str, bytes, bytearray],
strict: bool = False,
uts46: bool = False,
std3_rules: bool = False,
transitional: bool = False,
) -> bytes:
if not isinstance(s, str): if not isinstance(s, str):
try: try:
s = str(s, 'ascii') s = str(s, "ascii")
except UnicodeDecodeError: except UnicodeDecodeError:
raise IDNAError('should pass a unicode string to the function rather than a byte string.') raise IDNAError("should pass a unicode string to the function rather than a byte string.")
if uts46: if uts46:
s = uts46_remap(s, std3_rules, transitional) s = uts46_remap(s, std3_rules, transitional)
trailing_dot = False trailing_dot = False
result = [] result = []
if strict: if strict:
labels = s.split('.') labels = s.split(".")
else: else:
labels = _unicode_dots_re.split(s) labels = _unicode_dots_re.split(s)
if not labels or labels == ['']: if not labels or labels == [""]:
raise IDNAError('Empty domain') raise IDNAError("Empty domain")
if labels[-1] == '': if labels[-1] == "":
del labels[-1] del labels[-1]
trailing_dot = True trailing_dot = True
for label in labels: for label in labels:
@ -356,21 +393,26 @@ def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool =
if s: if s:
result.append(s) result.append(s)
else: else:
raise IDNAError('Empty label') raise IDNAError("Empty label")
if trailing_dot: if trailing_dot:
result.append(b'') result.append(b"")
s = b'.'.join(result) s = b".".join(result)
if not valid_string_length(s, trailing_dot): if not valid_string_length(s, trailing_dot):
raise IDNAError('Domain too long') raise IDNAError("Domain too long")
return s return s
def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False) -> str: def decode(
s: Union[str, bytes, bytearray],
strict: bool = False,
uts46: bool = False,
std3_rules: bool = False,
) -> str:
try: try:
if not isinstance(s, str): if not isinstance(s, str):
s = str(s, 'ascii') s = str(s, "ascii")
except UnicodeDecodeError: except UnicodeDecodeError:
raise IDNAError('Invalid ASCII in A-label') raise IDNAError("Invalid ASCII in A-label")
if uts46: if uts46:
s = uts46_remap(s, std3_rules, False) s = uts46_remap(s, std3_rules, False)
trailing_dot = False trailing_dot = False
@ -378,9 +420,9 @@ def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool =
if not strict: if not strict:
labels = _unicode_dots_re.split(s) labels = _unicode_dots_re.split(s)
else: else:
labels = s.split('.') labels = s.split(".")
if not labels or labels == ['']: if not labels or labels == [""]:
raise IDNAError('Empty domain') raise IDNAError("Empty domain")
if not labels[-1]: if not labels[-1]:
del labels[-1] del labels[-1]
trailing_dot = True trailing_dot = True
@ -389,7 +431,7 @@ def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool =
if s: if s:
result.append(s) result.append(s)
else: else:
raise IDNAError('Empty label') raise IDNAError("Empty label")
if trailing_dot: if trailing_dot:
result.append('') result.append("")
return '.'.join(result) return ".".join(result)

File diff suppressed because it is too large Load diff

View file

@ -8,6 +8,7 @@ in the original list?" in time O(log(# runs)).
import bisect import bisect
from typing import List, Tuple from typing import List, Tuple
def intranges_from_list(list_: List[int]) -> Tuple[int, ...]: def intranges_from_list(list_: List[int]) -> Tuple[int, ...]:
"""Represent a list of integers as a sequence of ranges: """Represent a list of integers as a sequence of ranges:
((start_0, end_0), (start_1, end_1), ...), such that the original ((start_0, end_0), (start_1, end_1), ...), such that the original
@ -20,18 +21,20 @@ def intranges_from_list(list_: List[int]) -> Tuple[int, ...]:
ranges = [] ranges = []
last_write = -1 last_write = -1
for i in range(len(sorted_list)): for i in range(len(sorted_list)):
if i+1 < len(sorted_list): if i + 1 < len(sorted_list):
if sorted_list[i] == sorted_list[i+1]-1: if sorted_list[i] == sorted_list[i + 1] - 1:
continue continue
current_range = sorted_list[last_write+1:i+1] current_range = sorted_list[last_write + 1 : i + 1]
ranges.append(_encode_range(current_range[0], current_range[-1] + 1)) ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
last_write = i last_write = i
return tuple(ranges) return tuple(ranges)
def _encode_range(start: int, end: int) -> int: def _encode_range(start: int, end: int) -> int:
return (start << 32) | end return (start << 32) | end
def _decode_range(r: int) -> Tuple[int, int]: def _decode_range(r: int) -> Tuple[int, int]:
return (r >> 32), (r & ((1 << 32) - 1)) return (r >> 32), (r & ((1 << 32) - 1))
@ -43,7 +46,7 @@ def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool:
# we could be immediately ahead of a tuple (start, end) # we could be immediately ahead of a tuple (start, end)
# with start < int_ <= end # with start < int_ <= end
if pos > 0: if pos > 0:
left, right = _decode_range(ranges[pos-1]) left, right = _decode_range(ranges[pos - 1])
if left <= int_ < right: if left <= int_ < right:
return True return True
# or we could be immediately behind a tuple (int_, end) # or we could be immediately behind a tuple (int_, end)

View file

@ -1,2 +1 @@
__version__ = '3.7' __version__ = "3.10"

File diff suppressed because it is too large Load diff

View file

@ -15,7 +15,7 @@ ga4mp==2.0.4
gntp==1.0.3 gntp==1.0.3
html5lib==1.1 html5lib==1.1
httpagentparser==1.9.5 httpagentparser==1.9.5
idna==3.7 idna==3.10
importlib-metadata==8.5.0 importlib-metadata==8.5.0
importlib-resources==6.4.5 importlib-resources==6.4.5
git+https://github.com/Tautulli/ipwhois.git@master#egg=ipwhois git+https://github.com/Tautulli/ipwhois.git@master#egg=ipwhois