Move common libs to libs/common

2025-08-19 21:03:14 -07:00 · 2018-12-16 13:30:24 -05:00 · 2018-12-16 13:30:24 -05:00 · 1f4bd41bcc
commit 1f4bd41bcc
parent 8dbb1a2451
1612 changed files with 962 additions and 10 deletions
--- a/libs/common/unidecode/init.py
+++ b/libs/common/unidecode/init.py
@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+# vi:tabstop=4:expandtab:sw=4
+"""Transliterate Unicode text into plain 7-bit ASCII.
+
+Example usage:
+>>> from unidecode import unidecode
+>>> unidecode(u"\u5317\u4EB0")
+"Bei Jing "
+
+The transliteration uses a straightforward map, and doesn't have alternatives
+for the same character based on language, position, or anything else.
+
+In Python 3, a standard string object will be returned. If you need bytes, use:
+>>> unidecode("Κνωσός").encode("ascii")
+b'Knosos'
+"""
+import warnings
+from sys import version_info
+
+Cache = {}
+
+
+def _warn_if_not_unicode(string):
+    if version_info[0] < 3 and not isinstance(string, unicode):
+        warnings.warn(  "Argument %r is not an unicode object. "
+                        "Passing an encoded string will likely have "
+                        "unexpected results." % (type(string),),
+                        RuntimeWarning, 2)
+
+
+def unidecode_expect_ascii(string):
+    """Transliterate an Unicode object into an ASCII string
+
+    >>> unidecode(u"\u5317\u4EB0")
+    "Bei Jing "
+
+    This function first tries to convert the string using ASCII codec.
+    If it fails (because of non-ASCII characters), it falls back to
+    transliteration using the character tables.
+
+    This is approx. five times faster if the string only contains ASCII
+    characters, but slightly slower than using unidecode directly if non-ASCII
+    chars are present.
+    """
+
+    _warn_if_not_unicode(string)
+    try:
+        bytestring = string.encode('ASCII')
+    except UnicodeEncodeError:
+        return _unidecode(string)
+    if version_info[0] >= 3:
+        return string
+    else:
+        return bytestring
+
+def unidecode_expect_nonascii(string):
+    """Transliterate an Unicode object into an ASCII string
+
+    >>> unidecode(u"\u5317\u4EB0")
+    "Bei Jing "
+    """
+
+    _warn_if_not_unicode(string)
+    return _unidecode(string)
+
+unidecode = unidecode_expect_ascii
+
+def _unidecode(string):
+    retval = []
+
+    for char in string:
+        codepoint = ord(char)
+
+        if codepoint < 0x80: # Basic ASCII
+            retval.append(str(char))
+            continue
+        
+        if codepoint > 0xeffff:
+            continue # Characters in Private Use Area and above are ignored
+
+        if 0xd800 <= codepoint <= 0xdfff:
+            warnings.warn(  "Surrogate character %r will be ignored. "
+                            "You might be using a narrow Python build." % (char,),
+                            RuntimeWarning, 2)
+
+        section = codepoint >> 8   # Chop off the last two hex digits
+        position = codepoint % 256 # Last two hex digits
+
+        try:
+            table = Cache[section]
+        except KeyError:
+            try:
+                mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])
+            except ImportError:
+                Cache[section] = None
+                continue   # No match: ignore this character and carry on.
+
+            Cache[section] = table = mod.data
+
+        if table and len(table) > position:
+            retval.append( table[position] )
+
+    return ''.join(retval)