mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-19 21:03:14 -07:00
Update vendored beets to 1.6.0
Updates colorama to 0.4.6 Adds confuse version 1.7.0 Updates jellyfish to 0.9.0 Adds mediafile 0.10.1 Updates munkres to 1.1.4 Updates musicbrainzngs to 0.7.1 Updates mutagen to 1.46.0 Updates pyyaml to 6.0 Updates unidecode to 1.3.6
This commit is contained in:
parent
5073ec0c6f
commit
56c6773c6b
385 changed files with 25143 additions and 18080 deletions
|
@ -3,35 +3,39 @@
|
|||
"""Transliterate Unicode text into plain 7-bit ASCII.
|
||||
|
||||
Example usage:
|
||||
|
||||
>>> from unidecode import unidecode
|
||||
>>> unidecode(u"\u5317\u4EB0")
|
||||
>>> unidecode("\u5317\u4EB0")
|
||||
"Bei Jing "
|
||||
|
||||
The transliteration uses a straightforward map, and doesn't have alternatives
|
||||
for the same character based on language, position, or anything else.
|
||||
|
||||
In Python 3, a standard string object will be returned. If you need bytes, use:
|
||||
A standard string object will be returned. If you need bytes, use:
|
||||
|
||||
>>> unidecode("Κνωσός").encode("ascii")
|
||||
b'Knosos'
|
||||
"""
|
||||
import warnings
|
||||
from sys import version_info
|
||||
from typing import Dict, Optional, Sequence
|
||||
|
||||
Cache = {}
|
||||
Cache = {} # type: Dict[int, Optional[Sequence[Optional[str]]]]
|
||||
|
||||
class UnidecodeError(ValueError):
|
||||
def __init__(self, message: str, index: Optional[int] = None) -> None:
|
||||
"""Raised for Unidecode-related errors.
|
||||
|
||||
The index attribute contains the index of the character that caused
|
||||
the error.
|
||||
"""
|
||||
super(UnidecodeError, self).__init__(message)
|
||||
self.index = index
|
||||
|
||||
|
||||
def _warn_if_not_unicode(string):
|
||||
if version_info[0] < 3 and not isinstance(string, unicode):
|
||||
warnings.warn( "Argument %r is not an unicode object. "
|
||||
"Passing an encoded string will likely have "
|
||||
"unexpected results." % (type(string),),
|
||||
RuntimeWarning, 2)
|
||||
|
||||
|
||||
def unidecode_expect_ascii(string):
|
||||
def unidecode_expect_ascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
|
||||
"""Transliterate an Unicode object into an ASCII string
|
||||
|
||||
>>> unidecode(u"\u5317\u4EB0")
|
||||
>>> unidecode("\u5317\u4EB0")
|
||||
"Bei Jing "
|
||||
|
||||
This function first tries to convert the string using ASCII codec.
|
||||
|
@ -39,65 +43,96 @@ def unidecode_expect_ascii(string):
|
|||
transliteration using the character tables.
|
||||
|
||||
This is approx. five times faster if the string only contains ASCII
|
||||
characters, but slightly slower than using unidecode directly if non-ASCII
|
||||
chars are present.
|
||||
characters, but slightly slower than unicode_expect_nonascii if
|
||||
non-ASCII characters are present.
|
||||
|
||||
errors specifies what to do with characters that have not been
|
||||
found in replacement tables. The default is 'ignore' which ignores
|
||||
the character. 'strict' raises an UnidecodeError. 'replace'
|
||||
substitutes the character with replace_str (default is '?').
|
||||
'preserve' keeps the original character.
|
||||
|
||||
Note that if 'preserve' is used the returned string might not be
|
||||
ASCII!
|
||||
"""
|
||||
|
||||
_warn_if_not_unicode(string)
|
||||
try:
|
||||
bytestring = string.encode('ASCII')
|
||||
except UnicodeEncodeError:
|
||||
return _unidecode(string)
|
||||
if version_info[0] >= 3:
|
||||
return string
|
||||
pass
|
||||
else:
|
||||
return bytestring
|
||||
return string
|
||||
|
||||
def unidecode_expect_nonascii(string):
|
||||
return _unidecode(string, errors, replace_str)
|
||||
|
||||
def unidecode_expect_nonascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
|
||||
"""Transliterate an Unicode object into an ASCII string
|
||||
|
||||
>>> unidecode(u"\u5317\u4EB0")
|
||||
>>> unidecode("\u5317\u4EB0")
|
||||
"Bei Jing "
|
||||
|
||||
See unidecode_expect_ascii.
|
||||
"""
|
||||
|
||||
_warn_if_not_unicode(string)
|
||||
return _unidecode(string)
|
||||
return _unidecode(string, errors, replace_str)
|
||||
|
||||
unidecode = unidecode_expect_ascii
|
||||
|
||||
def _unidecode(string):
|
||||
def _get_repl_str(char: str) -> Optional[str]:
|
||||
codepoint = ord(char)
|
||||
|
||||
if codepoint < 0x80:
|
||||
# Already ASCII
|
||||
return str(char)
|
||||
|
||||
if codepoint > 0xeffff:
|
||||
# No data on characters in Private Use Area and above.
|
||||
return None
|
||||
|
||||
if 0xd800 <= codepoint <= 0xdfff:
|
||||
warnings.warn( "Surrogate character %r will be ignored. "
|
||||
"You might be using a narrow Python build." % (char,),
|
||||
RuntimeWarning, 2)
|
||||
|
||||
section = codepoint >> 8 # Chop off the last two hex digits
|
||||
position = codepoint % 256 # Last two hex digits
|
||||
|
||||
try:
|
||||
table = Cache[section]
|
||||
except KeyError:
|
||||
try:
|
||||
mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])
|
||||
except ImportError:
|
||||
# No data on this character
|
||||
Cache[section] = None
|
||||
return None
|
||||
|
||||
Cache[section] = table = mod.data
|
||||
|
||||
if table and len(table) > position:
|
||||
return table[position]
|
||||
else:
|
||||
return None
|
||||
|
||||
def _unidecode(string: str, errors: str, replace_str:str) -> str:
|
||||
retval = []
|
||||
|
||||
for char in string:
|
||||
codepoint = ord(char)
|
||||
for index, char in enumerate(string):
|
||||
repl = _get_repl_str(char)
|
||||
|
||||
if codepoint < 0x80: # Basic ASCII
|
||||
retval.append(str(char))
|
||||
continue
|
||||
|
||||
if codepoint > 0xeffff:
|
||||
continue # Characters in Private Use Area and above are ignored
|
||||
if repl is None:
|
||||
if errors == 'ignore':
|
||||
repl = ''
|
||||
elif errors == 'strict':
|
||||
raise UnidecodeError('no replacement found for character %r '
|
||||
'in position %d' % (char, index), index)
|
||||
elif errors == 'replace':
|
||||
repl = replace_str
|
||||
elif errors == 'preserve':
|
||||
repl = char
|
||||
else:
|
||||
raise UnidecodeError('invalid value for errors parameter %r' % (errors,))
|
||||
|
||||
if 0xd800 <= codepoint <= 0xdfff:
|
||||
warnings.warn( "Surrogate character %r will be ignored. "
|
||||
"You might be using a narrow Python build." % (char,),
|
||||
RuntimeWarning, 2)
|
||||
|
||||
section = codepoint >> 8 # Chop off the last two hex digits
|
||||
position = codepoint % 256 # Last two hex digits
|
||||
|
||||
try:
|
||||
table = Cache[section]
|
||||
except KeyError:
|
||||
try:
|
||||
mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])
|
||||
except ImportError:
|
||||
Cache[section] = None
|
||||
continue # No match: ignore this character and carry on.
|
||||
|
||||
Cache[section] = table = mod.data
|
||||
|
||||
if table and len(table) > position:
|
||||
retval.append( table[position] )
|
||||
retval.append(repl)
|
||||
|
||||
return ''.join(retval)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue