Update vendored beets to 1.6.0

Updates colorama to 0.4.6
Adds confuse version 1.7.0
Updates jellyfish to 0.9.0
Adds mediafile 0.10.1
Updates munkres to 1.1.4
Updates musicbrainzngs to 0.7.1
Updates mutagen to 1.46.0
Updates pyyaml to 6.0
Updates unidecode to 1.3.6
This commit is contained in:
Labrys of Knossos 2022-11-28 18:02:40 -05:00
commit 56c6773c6b
385 changed files with 25143 additions and 18080 deletions

View file

@ -3,35 +3,39 @@
"""Transliterate Unicode text into plain 7-bit ASCII.
Example usage:
>>> from unidecode import unidecode
>>> unidecode(u"\u5317\u4EB0")
>>> unidecode("\u5317\u4EB0")
"Bei Jing "
The transliteration uses a straightforward map, and doesn't have alternatives
for the same character based on language, position, or anything else.
In Python 3, a standard string object will be returned. If you need bytes, use:
A standard string object will be returned. If you need bytes, use:
>>> unidecode("Κνωσός").encode("ascii")
b'Knosos'
"""
import warnings
from sys import version_info
from typing import Dict, Optional, Sequence
Cache = {}
Cache = {} # type: Dict[int, Optional[Sequence[Optional[str]]]]
class UnidecodeError(ValueError):
def __init__(self, message: str, index: Optional[int] = None) -> None:
"""Raised for Unidecode-related errors.
The index attribute contains the index of the character that caused
the error.
"""
super(UnidecodeError, self).__init__(message)
self.index = index
def _warn_if_not_unicode(string):
if version_info[0] < 3 and not isinstance(string, unicode):
warnings.warn( "Argument %r is not an unicode object. "
"Passing an encoded string will likely have "
"unexpected results." % (type(string),),
RuntimeWarning, 2)
def unidecode_expect_ascii(string):
def unidecode_expect_ascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
"""Transliterate an Unicode object into an ASCII string
>>> unidecode(u"\u5317\u4EB0")
>>> unidecode("\u5317\u4EB0")
"Bei Jing "
This function first tries to convert the string using ASCII codec.
@ -39,65 +43,96 @@ def unidecode_expect_ascii(string):
transliteration using the character tables.
This is approx. five times faster if the string only contains ASCII
characters, but slightly slower than using unidecode directly if non-ASCII
chars are present.
characters, but slightly slower than unicode_expect_nonascii if
non-ASCII characters are present.
errors specifies what to do with characters that have not been
found in replacement tables. The default is 'ignore' which ignores
the character. 'strict' raises an UnidecodeError. 'replace'
substitutes the character with replace_str (default is '?').
'preserve' keeps the original character.
Note that if 'preserve' is used the returned string might not be
ASCII!
"""
_warn_if_not_unicode(string)
try:
bytestring = string.encode('ASCII')
except UnicodeEncodeError:
return _unidecode(string)
if version_info[0] >= 3:
return string
pass
else:
return bytestring
return string
def unidecode_expect_nonascii(string):
return _unidecode(string, errors, replace_str)
def unidecode_expect_nonascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
"""Transliterate an Unicode object into an ASCII string
>>> unidecode(u"\u5317\u4EB0")
>>> unidecode("\u5317\u4EB0")
"Bei Jing "
See unidecode_expect_ascii.
"""
_warn_if_not_unicode(string)
return _unidecode(string)
return _unidecode(string, errors, replace_str)
unidecode = unidecode_expect_ascii
def _unidecode(string):
def _get_repl_str(char: str) -> Optional[str]:
codepoint = ord(char)
if codepoint < 0x80:
# Already ASCII
return str(char)
if codepoint > 0xeffff:
# No data on characters in Private Use Area and above.
return None
if 0xd800 <= codepoint <= 0xdfff:
warnings.warn( "Surrogate character %r will be ignored. "
"You might be using a narrow Python build." % (char,),
RuntimeWarning, 2)
section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits
try:
table = Cache[section]
except KeyError:
try:
mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])
except ImportError:
# No data on this character
Cache[section] = None
return None
Cache[section] = table = mod.data
if table and len(table) > position:
return table[position]
else:
return None
def _unidecode(string: str, errors: str, replace_str:str) -> str:
retval = []
for char in string:
codepoint = ord(char)
for index, char in enumerate(string):
repl = _get_repl_str(char)
if codepoint < 0x80: # Basic ASCII
retval.append(str(char))
continue
if codepoint > 0xeffff:
continue # Characters in Private Use Area and above are ignored
if repl is None:
if errors == 'ignore':
repl = ''
elif errors == 'strict':
raise UnidecodeError('no replacement found for character %r '
'in position %d' % (char, index), index)
elif errors == 'replace':
repl = replace_str
elif errors == 'preserve':
repl = char
else:
raise UnidecodeError('invalid value for errors parameter %r' % (errors,))
if 0xd800 <= codepoint <= 0xdfff:
warnings.warn( "Surrogate character %r will be ignored. "
"You might be using a narrow Python build." % (char,),
RuntimeWarning, 2)
section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits
try:
table = Cache[section]
except KeyError:
try:
mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])
except ImportError:
Cache[section] = None
continue # No match: ignore this character and carry on.
Cache[section] = table = mod.data
if table and len(table) > position:
retval.append( table[position] )
retval.append(repl)
return ''.join(retval)